use std::time::Instant;
use zipora::{BitVector, BitwiseOp, RankSelect256, RankSelectOps, RankSelectPerformanceOps};
use zipora::system::CpuFeatures;
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("🚀 SIMD Optimization Demo - Advanced Hardware Acceleration");
println!("================================================================\n");
demonstrate_cpu_features();
let test_data = create_test_data(100_000);
println!("📊 Created test dataset with {} bits", test_data.len());
println!(" Set bits: {}", test_data.count_ones());
println!(
" Density: {:.2}%\n",
(test_data.count_ones() as f64 / test_data.len() as f64) * 100.0
);
let rs = RankSelect256::new(test_data.clone())?;
println!("🏗️ Built RankSelect256 structure");
println!(" Space overhead: {:.2}%\n", rs.space_overhead_percent());
demonstrate_rank_performance(&rs);
demonstrate_select_performance(&rs);
demonstrate_simd_bulk_operations(test_data);
println!("✅ SIMD Optimization Demo completed successfully!");
Ok(())
}
fn demonstrate_cpu_features() {
println!("🔍 CPU Feature Detection");
println!("========================");
let features = zipora::system::get_cpu_features();
println!("Detected CPU features:");
println!(
" POPCNT: {}",
if features.has_popcnt {
"✅ Available"
} else {
"❌ Not available"
}
);
println!(
" BMI2: {}",
if features.has_bmi2 {
"✅ Available"
} else {
"❌ Not available"
}
);
println!(
" AVX2: {}",
if features.has_avx2 {
"✅ Available"
} else {
"❌ Not available"
}
);
println!("\nOptimization levels available:");
if features.has_popcnt {
println!(" 📈 Hardware POPCNT: 2-5x faster rank operations");
}
if features.has_bmi2 {
println!(" 🚀 Hardware BMI2: 5-10x faster select operations");
}
if features.has_avx2 {
println!(" ⚡ SIMD AVX2: Vectorized bulk operations");
}
if !features.has_popcnt && !features.has_bmi2 && !features.has_avx2 {
println!(" 📚 Lookup tables: 10-100x faster than naive implementation");
}
println!();
}
fn create_test_data(size: usize) -> BitVector {
let mut bv = BitVector::new();
for i in 0..size {
let bit = match i % 10000 {
0..=1000 => i % 7 == 0, 1001..=5000 => i % 137 == 0, 5001..=8000 => i % 3 == 0, _ => i % 1013 == 0, };
bv.push(bit).unwrap();
}
bv
}
fn demonstrate_rank_performance(rs: &RankSelect256) {
println!("⚡ Rank Operation Performance Comparison");
println!("========================================");
let test_positions: Vec<usize> = (0..rs.len()).step_by(rs.len() / 1000).collect();
let iterations = 1000;
println!(
"Testing {} rank operations across {} iterations...",
test_positions.len(),
iterations
);
let start = Instant::now();
for _ in 0..iterations {
for &pos in &test_positions {
let _ = rs.rank1_hardware_accelerated(pos);
}
}
let lookup_time = start.elapsed();
let start = Instant::now();
for _ in 0..iterations {
for &pos in &test_positions {
let _ = rs.rank1_hardware_accelerated(pos);
}
}
let hardware_time = start.elapsed();
let start = Instant::now();
for _ in 0..iterations {
for &pos in &test_positions {
let _ = rs.rank1_adaptive(pos);
}
}
let adaptive_time = start.elapsed();
println!("Results:");
println!(
" Lookup tables: {:8.2} ms",
lookup_time.as_secs_f64() * 1000.0
);
println!(
" Hardware-accel: {:8.2} ms",
hardware_time.as_secs_f64() * 1000.0
);
println!(
" Adaptive (best): {:8.2} ms",
adaptive_time.as_secs_f64() * 1000.0
);
if hardware_time < lookup_time {
let speedup = lookup_time.as_secs_f64() / hardware_time.as_secs_f64();
println!(" 🚀 Hardware speedup: {:.1}x faster", speedup);
} else {
println!(" 📚 Lookup tables are optimal on this CPU");
}
println!();
}
fn demonstrate_select_performance(rs: &RankSelect256) {
println!("🎯 Select Operation Performance Comparison");
println!("===========================================");
let ones_count = rs.count_ones();
if ones_count == 0 {
println!("No set bits in test data, skipping select performance test.\n");
return;
}
let test_ks: Vec<usize> = (0..ones_count).step_by(ones_count.max(1) / 500).collect();
let iterations = 500;
println!(
"Testing {} select operations across {} iterations...",
test_ks.len(),
iterations
);
let start = Instant::now();
for _ in 0..iterations {
for &k in &test_ks {
let _ = rs.select1_hardware_accelerated(k);
}
}
let lookup_time = start.elapsed();
let start = Instant::now();
for _ in 0..iterations {
for &k in &test_ks {
let _ = rs.select1_hardware_accelerated(k);
}
}
let hardware_time = start.elapsed();
let start = Instant::now();
for _ in 0..iterations {
for &k in &test_ks {
let _ = rs.select1_adaptive(k);
}
}
let adaptive_time = start.elapsed();
println!("Results:");
println!(
" Lookup tables: {:8.2} ms",
lookup_time.as_secs_f64() * 1000.0
);
println!(
" Hardware-accel: {:8.2} ms",
hardware_time.as_secs_f64() * 1000.0
);
println!(
" Adaptive (best): {:8.2} ms",
adaptive_time.as_secs_f64() * 1000.0
);
if hardware_time < lookup_time {
let speedup = lookup_time.as_secs_f64() / hardware_time.as_secs_f64();
println!(" 🚀 Hardware speedup: {:.1}x faster", speedup);
} else {
println!(" 📚 Lookup tables are optimal on this CPU");
}
println!();
}
fn demonstrate_simd_bulk_operations(mut test_data: BitVector) {
println!("🔄 SIMD Bulk Operations Demo");
println!("============================");
let positions: Vec<usize> = (0..test_data.len()).step_by(1000).collect();
println!(
"Testing bulk rank operations on {} positions...",
positions.len()
);
let start = Instant::now();
let bulk_ranks = test_data.rank1_bulk_simd(&positions);
let bulk_time = start.elapsed();
let start = Instant::now();
let individual_ranks: Vec<usize> = positions.iter().map(|&pos| test_data.rank1(pos)).collect();
let individual_time = start.elapsed();
println!(
" Bulk SIMD: {:8.2} ms",
bulk_time.as_secs_f64() * 1000.0
);
println!(
" Individual: {:8.2} ms",
individual_time.as_secs_f64() * 1000.0
);
assert_eq!(
bulk_ranks, individual_ranks,
"Bulk rank results must match individual results"
);
println!(" ✅ Results verified");
if individual_time > bulk_time {
let speedup = individual_time.as_secs_f64() / bulk_time.as_secs_f64();
println!(" 🚀 Bulk speedup: {:.1}x faster", speedup);
}
println!("\nTesting SIMD range setting...");
let mut bv_copy = test_data.clone();
let start = Instant::now();
bv_copy.set_range_simd(1000, 5000, true).unwrap();
let simd_time = start.elapsed();
let mut bv_individual = test_data.clone();
let start = Instant::now();
for i in 1000..5000 {
bv_individual.set(i, true).unwrap();
}
let individual_time = start.elapsed();
println!(
" SIMD range: {:8.2} ms",
simd_time.as_secs_f64() * 1000.0
);
println!(
" Individual: {:8.2} ms",
individual_time.as_secs_f64() * 1000.0
);
for i in 1000..5000 {
assert_eq!(
bv_copy.get(i),
bv_individual.get(i),
"Range setting results must match at position {}",
i
);
}
println!(" ✅ Results verified");
if individual_time > simd_time {
let speedup = individual_time.as_secs_f64() / simd_time.as_secs_f64();
println!(" 🚀 SIMD speedup: {:.1}x faster", speedup);
}
println!("\nTesting SIMD bulk bitwise operations...");
let mut other_bv = BitVector::new();
for i in 0..test_data.len() {
other_bv.push(i % 5 == 0).unwrap();
}
let mut bv_simd = test_data.clone();
let start = Instant::now();
bv_simd
.bulk_bitwise_op_simd(&other_bv, BitwiseOp::And, 0, 10000)
.unwrap();
let simd_time = start.elapsed();
let mut bv_individual = test_data.clone();
let start = Instant::now();
for i in 0..10000 {
let self_bit = bv_individual.get(i).unwrap_or(false);
let other_bit = other_bv.get(i).unwrap_or(false);
bv_individual.set(i, self_bit & other_bit).unwrap();
}
let individual_time = start.elapsed();
println!(
" SIMD bitwise: {:8.2} ms",
simd_time.as_secs_f64() * 1000.0
);
println!(
" Individual: {:8.2} ms",
individual_time.as_secs_f64() * 1000.0
);
for i in 0..10000 {
assert_eq!(
bv_simd.get(i),
bv_individual.get(i),
"Bitwise operation results must match at position {}",
i
);
}
println!(" ✅ Results verified");
if individual_time > simd_time {
let speedup = individual_time.as_secs_f64() / simd_time.as_secs_f64();
println!(" 🚀 SIMD speedup: {:.1}x faster", speedup);
}
println!();
}