use crate::error::{Result, ZiporaError};
#[derive(Debug, Clone)]
pub struct AdaptiveCpuCapabilities {
pub has_bmi1: bool,
pub has_bmi2: bool,
pub has_popcnt: bool,
pub has_avx2: bool,
pub optimization_tier: u8,
pub chunk_size: usize,
}
impl AdaptiveCpuCapabilities {
pub fn detect() -> Self {
let has_bmi1 = Self::detect_bmi1();
let has_bmi2 = Self::detect_bmi2();
let has_popcnt = Self::detect_popcnt();
let has_avx2 = Self::detect_avx2();
let optimization_tier = match (has_popcnt, has_bmi1, has_bmi2, has_avx2) {
(true, true, true, true) => 4, (true, true, true, false) => 3, (true, true, false, _) => 2, (true, false, false, _) => 1, _ => 0, };
let chunk_size = match optimization_tier {
4 => 1024, 3 => 512, 2 => 256, 1 => 128, _ => 64, };
Self {
has_bmi1,
has_bmi2,
has_popcnt,
has_avx2,
optimization_tier,
chunk_size,
}
}
#[inline]
pub fn get() -> &'static Self {
static CAPABILITIES: std::sync::OnceLock<AdaptiveCpuCapabilities> = std::sync::OnceLock::new();
CAPABILITIES.get_or_init(Self::detect)
}
#[cfg(target_arch = "x86_64")]
fn detect_bmi1() -> bool {
std::is_x86_feature_detected!("bmi1")
}
#[cfg(not(target_arch = "x86_64"))]
fn detect_bmi1() -> bool {
false
}
#[cfg(target_arch = "x86_64")]
fn detect_bmi2() -> bool {
std::is_x86_feature_detected!("bmi2")
}
#[cfg(not(target_arch = "x86_64"))]
fn detect_bmi2() -> bool {
false
}
#[cfg(target_arch = "x86_64")]
fn detect_popcnt() -> bool {
std::is_x86_feature_detected!("popcnt")
}
#[cfg(not(target_arch = "x86_64"))]
fn detect_popcnt() -> bool {
false
}
#[cfg(target_arch = "x86_64")]
fn detect_avx2() -> bool {
std::is_x86_feature_detected!("avx2")
}
#[cfg(not(target_arch = "x86_64"))]
fn detect_avx2() -> bool {
false
}
}
use crate::succinct::BitVector;
use super::{
RankSelectOps, RankSelectInterleaved256,
};
use std::fmt;
#[derive(Debug, Clone)]
pub struct DataProfile {
pub total_bits: usize,
pub ones_count: usize,
pub density: f64,
pub access_pattern: AccessPattern,
pub size_category: SizeCategory,
pub pattern_complexity: f64,
pub clustering_coefficient: f64,
pub entropy: f64,
pub run_length_stats: RunLengthStats,
pub hardware_tier: u8,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum AccessPattern {
Mixed,
RankHeavy,
SelectHeavy,
Sequential,
Random,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum SizeCategory {
Small,
Medium,
Large,
VeryLarge,
}
#[derive(Debug, Clone)]
pub struct RunLengthStats {
pub avg_ones_run: f64,
pub avg_zeros_run: f64,
pub max_ones_run: usize,
pub max_zeros_run: usize,
pub alternations: usize,
}
#[derive(Debug, Clone)]
pub struct SelectionCriteria {
pub sparse_threshold: f64,
pub dense_threshold: f64,
pub small_dataset_threshold: usize,
pub large_dataset_threshold: usize,
pub very_large_dataset_threshold: usize,
pub access_pattern: AccessPattern,
pub enable_select_cache: bool,
pub prefer_space: bool,
pub enable_adaptive_thresholds: bool,
pub min_hardware_tier: u8,
pub pattern_complexity_weight: f64,
pub clustering_weight: f64,
}
impl Default for SelectionCriteria {
fn default() -> Self {
Self {
sparse_threshold: 0.05,
dense_threshold: 0.90,
small_dataset_threshold: 10_000,
large_dataset_threshold: 1_000_000,
very_large_dataset_threshold: 100_000_000,
access_pattern: AccessPattern::Mixed,
enable_select_cache: true,
prefer_space: false,
enable_adaptive_thresholds: true,
min_hardware_tier: 0,
pattern_complexity_weight: 0.3,
clustering_weight: 0.2,
}
}
}
impl Default for RunLengthStats {
fn default() -> Self {
Self {
avg_ones_run: 1.0,
avg_zeros_run: 1.0,
max_ones_run: 1,
max_zeros_run: 1,
alternations: 0,
}
}
}
pub struct AdaptiveRankSelect {
implementation: RankSelectInterleaved256,
implementation_name: String,
profile: DataProfile,
criteria: SelectionCriteria,
}
impl AdaptiveRankSelect {
pub fn new(bit_vector: BitVector) -> Result<Self> {
Self::with_criteria(bit_vector, SelectionCriteria::default())
}
pub fn with_criteria(bit_vector: BitVector, criteria: SelectionCriteria) -> Result<Self> {
let profile = Self::analyze_data(&bit_vector, &criteria);
let (implementation, name) = Self::select_implementation(bit_vector, &profile, &criteria)?;
Ok(Self {
implementation,
implementation_name: name,
profile,
criteria,
})
}
fn analyze_data(bit_vector: &BitVector, criteria: &SelectionCriteria) -> DataProfile {
let total_bits = bit_vector.len();
let ones_count = bit_vector.count_ones();
let density = if total_bits == 0 { 0.0 } else { ones_count as f64 / total_bits as f64 };
let size_category = match total_bits {
0..=9999 => SizeCategory::Small,
10000..=999999 => SizeCategory::Medium,
1000000..=99999999 => SizeCategory::Large,
_ => SizeCategory::VeryLarge,
};
let run_length_stats = Self::analyze_run_lengths(bit_vector);
let pattern_complexity = Self::calculate_pattern_complexity(bit_vector, &run_length_stats);
let clustering_coefficient = Self::calculate_clustering_coefficient(bit_vector);
let entropy = Self::calculate_entropy(bit_vector);
let hardware_tier = AdaptiveCpuCapabilities::get().optimization_tier;
let access_pattern = Self::infer_access_pattern(criteria.access_pattern, &run_length_stats, pattern_complexity);
DataProfile {
total_bits,
ones_count,
density,
access_pattern,
size_category,
pattern_complexity,
clustering_coefficient,
entropy,
run_length_stats,
hardware_tier,
}
}
fn analyze_run_lengths(bit_vector: &BitVector) -> RunLengthStats {
if bit_vector.len() == 0 {
return RunLengthStats::default();
}
let mut ones_runs = Vec::new();
let mut zeros_runs = Vec::new();
let mut current_run = 1;
let mut current_bit = bit_vector.get(0).unwrap_or(false);
let mut alternations = 0;
for i in 1..bit_vector.len() {
let bit = bit_vector.get(i).unwrap_or(false);
if bit == current_bit {
current_run += 1;
} else {
if current_bit {
ones_runs.push(current_run);
} else {
zeros_runs.push(current_run);
}
current_run = 1;
current_bit = bit;
alternations += 1;
}
}
if current_bit {
ones_runs.push(current_run);
} else {
zeros_runs.push(current_run);
}
let avg_ones_run = if ones_runs.is_empty() { 0.0 } else {
ones_runs.iter().sum::<usize>() as f64 / ones_runs.len() as f64
};
let avg_zeros_run = if zeros_runs.is_empty() { 0.0 } else {
zeros_runs.iter().sum::<usize>() as f64 / zeros_runs.len() as f64
};
RunLengthStats {
avg_ones_run,
avg_zeros_run,
max_ones_run: ones_runs.iter().max().copied().unwrap_or(0),
max_zeros_run: zeros_runs.iter().max().copied().unwrap_or(0),
alternations,
}
}
fn calculate_pattern_complexity(bit_vector: &BitVector, run_stats: &RunLengthStats) -> f64 {
if bit_vector.len() == 0 {
return 0.0;
}
let total_bits = bit_vector.len() as f64;
let alternation_density = run_stats.alternations as f64 / total_bits;
let avg_run_length = (run_stats.avg_ones_run + run_stats.avg_zeros_run) / 2.0;
let max_run_length = run_stats.max_ones_run.max(run_stats.max_zeros_run) as f64;
let run_variance = if avg_run_length > 0.0 {
(max_run_length - avg_run_length) / avg_run_length
} else {
0.0
};
let complexity = (alternation_density * 2.0 + run_variance.min(1.0)) / 3.0;
complexity.min(1.0)
}
fn calculate_clustering_coefficient(bit_vector: &BitVector) -> f64 {
if bit_vector.len() < 3 {
return 1.0; }
let mut clusters = 0;
let window_size = 64;
for start in (0..bit_vector.len()).step_by(window_size) {
let end = (start + window_size).min(bit_vector.len());
let mut local_transitions = 0;
for i in start..end-1 {
if bit_vector.get(i) != bit_vector.get(i + 1) {
local_transitions += 1;
}
}
if local_transitions < window_size / 4 {
clusters += 1;
}
}
let num_windows = (bit_vector.len() + window_size - 1) / window_size;
if num_windows == 0 { 1.0 } else { clusters as f64 / num_windows as f64 }
}
fn calculate_entropy(bit_vector: &BitVector) -> f64 {
if bit_vector.len() == 0 {
return 0.0;
}
let ones = bit_vector.count_ones() as f64;
let zeros = (bit_vector.len() - bit_vector.count_ones()) as f64;
let total = bit_vector.len() as f64;
if ones == 0.0 || zeros == 0.0 {
return 0.0; }
let p1 = ones / total;
let p0 = zeros / total;
-(p1 * p1.log2() + p0 * p0.log2())
}
fn infer_access_pattern(
provided: AccessPattern,
run_stats: &RunLengthStats,
complexity: f64,
) -> AccessPattern {
match provided {
AccessPattern::Mixed => {
if run_stats.avg_ones_run > 32.0 || run_stats.avg_zeros_run > 32.0 {
AccessPattern::Sequential
} else if complexity < 0.3 {
AccessPattern::Sequential
} else if complexity > 0.7 {
AccessPattern::Random
} else {
AccessPattern::Mixed
}
}
_ => provided, }
}
fn select_implementation(
bit_vector: BitVector,
profile: &DataProfile,
criteria: &SelectionCriteria,
) -> Result<(RankSelectInterleaved256, String)> {
let description = format!("RankSelectInterleaved256 ({})",
Self::get_description(profile, criteria));
Ok((
RankSelectInterleaved256::new(bit_vector)?,
description,
))
}
fn get_description(profile: &DataProfile, criteria: &SelectionCriteria) -> String {
let (adaptive_sparse_threshold, adaptive_dense_threshold) = if criteria.enable_adaptive_thresholds {
Self::tune_thresholds(profile, criteria)
} else {
(criteria.sparse_threshold, criteria.dense_threshold)
};
if profile.ones_count == 0 {
"all zeros"
} else if profile.ones_count == profile.total_bits {
"all ones"
} else if profile.density < adaptive_sparse_threshold {
if profile.ones_count * 2 < profile.total_bits {
"sparse ones optimized"
} else {
"sparse zeros optimized"
}
} else if profile.density > adaptive_dense_threshold {
"dense data optimized"
} else {
match (profile.size_category, profile.access_pattern) {
(SizeCategory::Small, _) => "small dataset optimized",
(SizeCategory::Medium, AccessPattern::Sequential) => "medium sequential optimized",
(SizeCategory::Medium, AccessPattern::Mixed) => "medium mixed optimized",
(SizeCategory::Medium, _) => "medium dataset optimized",
(SizeCategory::Large, AccessPattern::Sequential) => "large sequential optimized",
(SizeCategory::Large, _) => "large dataset optimized",
(SizeCategory::VeryLarge, _) => "very large dataset optimized",
}
}.to_string()
}
fn tune_thresholds(profile: &DataProfile, criteria: &SelectionCriteria) -> (f64, f64) {
let mut sparse_threshold = criteria.sparse_threshold;
let mut dense_threshold = criteria.dense_threshold;
let cache_optimization_level = Self::determine_cache_level(profile);
match cache_optimization_level {
1 => {
sparse_threshold *= 1.5; dense_threshold *= 0.7; },
4 => {
sparse_threshold *= 0.8; dense_threshold *= 1.2; },
_ => {
sparse_threshold *= 1.0;
dense_threshold *= 1.0;
}
}
let complexity_factor = profile.pattern_complexity * criteria.pattern_complexity_weight;
let clustering_factor = profile.clustering_coefficient * criteria.clustering_weight;
if profile.clustering_coefficient > 0.7 {
sparse_threshold *= 1.0 + clustering_factor;
}
if profile.pattern_complexity > 0.6 {
sparse_threshold *= 1.0 - complexity_factor;
}
if profile.run_length_stats.avg_ones_run > 32.0 || profile.run_length_stats.avg_zeros_run > 32.0 {
dense_threshold *= 0.85; }
if profile.hardware_tier >= 3 { sparse_threshold *= 1.3; dense_threshold *= 0.8; }
sparse_threshold = sparse_threshold.clamp(0.01, 0.25);
dense_threshold = dense_threshold.clamp(0.75, 0.98);
(sparse_threshold, dense_threshold)
}
fn determine_cache_level(profile: &DataProfile) -> u8 {
let estimated_range = if profile.clustering_coefficient > 0.7 {
(profile.ones_count as f64 / profile.clustering_coefficient).min(128.0) as usize
} else {
(profile.total_bits as f64 / (profile.ones_count as f64 + 1.0)).min(512.0) as usize
};
if estimated_range <= 32 {
1
} else if estimated_range <= 128 {
2
} else {
4
}
}
pub fn implementation_name(&self) -> &str {
&self.implementation_name
}
pub fn data_profile(&self) -> &DataProfile {
&self.profile
}
pub fn selection_criteria(&self) -> &SelectionCriteria {
&self.criteria
}
pub fn optimization_stats(&self) -> OptimizationStats {
OptimizationStats {
total_bits: self.profile.total_bits,
ones_count: self.profile.ones_count,
density: self.profile.density,
implementation: self.implementation_name.clone(),
space_overhead_percent: self.implementation.space_overhead_percent(),
estimated_performance_tier: self.estimate_performance_tier(),
}
}
fn estimate_performance_tier(&self) -> PerformanceTier {
PerformanceTier::HighPerformance
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum PerformanceTier {
SpaceOptimized,
HighPerformance,
Sequential,
Balanced,
}
#[derive(Debug, Clone)]
pub struct OptimizationStats {
pub total_bits: usize,
pub ones_count: usize,
pub density: f64,
pub implementation: String,
pub space_overhead_percent: f64,
pub estimated_performance_tier: PerformanceTier,
}
impl fmt::Display for OptimizationStats {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"AdaptiveRankSelect Stats:\n\
Total bits: {}\n\
Ones count: {} ({:.2}% density)\n\
Implementation: {}\n\
Space overhead: {:.2}%\n\
Performance tier: {:?}",
self.total_bits,
self.ones_count,
self.density * 100.0,
self.implementation,
self.space_overhead_percent,
self.estimated_performance_tier
)
}
}
impl RankSelectOps for AdaptiveRankSelect {
#[inline]
fn rank1(&self, pos: usize) -> usize {
self.implementation.rank1(pos)
}
#[inline]
fn rank0(&self, pos: usize) -> usize {
self.implementation.rank0(pos)
}
#[inline]
fn select1(&self, k: usize) -> Result<usize> {
self.implementation.select1(k)
}
#[inline]
fn select0(&self, k: usize) -> Result<usize> {
self.implementation.select0(k)
}
fn len(&self) -> usize {
self.implementation.len()
}
fn count_ones(&self) -> usize {
self.implementation.count_ones()
}
fn get(&self, index: usize) -> Option<bool> {
self.implementation.get(index)
}
fn space_overhead_percent(&self) -> f64 {
self.implementation.space_overhead_percent()
}
}
pub struct AdaptiveMultiDimensional {
implementation: RankSelectInterleaved256,
implementation_name: String,
dimensions: usize,
}
impl AdaptiveMultiDimensional {
pub fn new_dual(bit_vector1: BitVector, bit_vector2: BitVector) -> Result<Self> {
if bit_vector1.len() != bit_vector2.len() {
return Err(ZiporaError::invalid_data(
"Bit vectors must have the same length".to_string()
));
}
let implementation = RankSelectInterleaved256::new(bit_vector1)?;
Ok(Self {
implementation,
implementation_name: "RankSelectInterleaved256 (dual dimension)".to_string(),
dimensions: 2,
})
}
pub fn implementation_name(&self) -> &str {
&self.implementation_name
}
pub fn dimensions(&self) -> usize {
self.dimensions
}
}
impl RankSelectOps for AdaptiveMultiDimensional {
#[inline]
fn rank1(&self, pos: usize) -> usize {
self.implementation.rank1(pos)
}
#[inline]
fn rank0(&self, pos: usize) -> usize {
self.implementation.rank0(pos)
}
#[inline]
fn select1(&self, k: usize) -> Result<usize> {
self.implementation.select1(k)
}
#[inline]
fn select0(&self, k: usize) -> Result<usize> {
self.implementation.select0(k)
}
fn len(&self) -> usize {
self.implementation.len()
}
fn count_ones(&self) -> usize {
self.implementation.count_ones()
}
fn get(&self, index: usize) -> Option<bool> {
self.implementation.get(index)
}
fn space_overhead_percent(&self) -> f64 {
self.implementation.space_overhead_percent()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_sparse_bitvector(size: usize, sparsity: usize) -> BitVector {
let mut bv = BitVector::new();
for i in 0..size {
bv.push(i % sparsity == 0).unwrap();
}
bv
}
fn create_dense_bitvector(size: usize) -> BitVector {
let mut bv = BitVector::new();
for i in 0..size {
bv.push(i % 10 != 0).unwrap(); }
bv
}
fn create_balanced_bitvector(size: usize) -> BitVector {
let mut bv = BitVector::new();
for i in 0..size {
bv.push(i % 2 == 0).unwrap(); }
bv
}
#[test]
fn test_sparse_data_selection() {
let sparse_bv = create_sparse_bitvector(10000, 100);
let adaptive = AdaptiveRankSelect::new(sparse_bv).unwrap();
assert!(adaptive.implementation_name().contains("RankSelectInterleaved256"));
assert!(adaptive.data_profile().density < 0.05);
let rank = adaptive.rank1(1000);
assert_eq!(rank, 10);
let pos = adaptive.select1(5).unwrap();
assert_eq!(pos, 500); }
#[test]
fn test_dense_data_selection() {
let dense_bv = create_dense_bitvector(1000);
let adaptive = AdaptiveRankSelect::new(dense_bv).unwrap();
println!("Dense test - Selected: {}, Density: {:.3}, Size category: {:?}",
adaptive.implementation_name(),
adaptive.data_profile().density,
adaptive.data_profile().size_category);
assert!(adaptive.implementation_name().contains("RankSelectInterleaved256"));
assert!(adaptive.data_profile().density > 0.8);
let rank = adaptive.rank1(100);
assert_eq!(rank, 90); }
#[test]
fn test_balanced_data_selection() {
let balanced_bv = create_balanced_bitvector(50000);
let adaptive = AdaptiveRankSelect::new(balanced_bv).unwrap();
assert!(adaptive.implementation_name().contains("RankSelect"));
assert!((adaptive.data_profile().density - 0.5).abs() < 0.1);
let rank = adaptive.rank1(1000);
assert_eq!(rank, 500); }
#[test]
fn test_small_dataset_selection() {
let small_bv = create_balanced_bitvector(5000);
let adaptive = AdaptiveRankSelect::new(small_bv).unwrap();
assert!(adaptive.implementation_name().contains("RankSelectInterleaved256"));
assert_eq!(adaptive.data_profile().size_category, SizeCategory::Small);
}
#[test]
fn test_large_dataset_selection() {
let large_bv = create_balanced_bitvector(5_000_000);
let adaptive = AdaptiveRankSelect::new(large_bv).unwrap();
assert!(adaptive.implementation_name().contains("RankSelectInterleaved256"));
assert_eq!(adaptive.data_profile().size_category, SizeCategory::Large);
}
#[test]
fn test_custom_criteria() {
let bv = create_sparse_bitvector(1000, 50);
let criteria = SelectionCriteria {
sparse_threshold: 0.025, ..Default::default()
};
let adaptive = AdaptiveRankSelect::with_criteria(bv.clone(), criteria).unwrap();
println!("Custom criteria test - Selected: {}, Density: {:.3}",
adaptive.implementation_name(),
adaptive.data_profile().density);
assert!(adaptive.implementation_name().contains("RankSelectInterleaved256"));
}
#[test]
fn test_edge_cases() {
let all_zeros = BitVector::with_size(1000, false).unwrap();
let adaptive = AdaptiveRankSelect::new(all_zeros).unwrap();
assert!(adaptive.implementation_name().contains("RankSelectInterleaved256"));
assert_eq!(adaptive.count_ones(), 0);
let all_ones = BitVector::with_size(1000, true).unwrap();
let adaptive = AdaptiveRankSelect::new(all_ones).unwrap();
assert!(adaptive.implementation_name().contains("RankSelectInterleaved256"));
assert_eq!(adaptive.count_ones(), 1000);
}
#[test]
fn test_optimization_stats() {
let sparse_bv = create_sparse_bitvector(10000, 100);
let adaptive = AdaptiveRankSelect::new(sparse_bv).unwrap();
let stats = adaptive.optimization_stats();
assert_eq!(stats.total_bits, 10000);
assert_eq!(stats.ones_count, 100);
assert!((stats.density - 0.01).abs() < 0.001);
assert!(stats.implementation.contains("RankSelectInterleaved256"));
assert_eq!(stats.estimated_performance_tier, PerformanceTier::HighPerformance);
let display_str = format!("{}", stats);
assert!(display_str.contains("AdaptiveRankSelect Stats"));
assert!(display_str.contains("RankSelectInterleaved256"));
}
#[test]
fn test_multi_dimensional_adaptive() {
let bv1 = create_balanced_bitvector(1000);
let bv2 = create_sparse_bitvector(1000, 50);
let multi = AdaptiveMultiDimensional::new_dual(bv1, bv2).unwrap();
assert!(multi.implementation_name().contains("RankSelectInterleaved256"));
assert_eq!(multi.dimensions(), 2);
assert_eq!(multi.len(), 1000);
let rank = multi.rank1(100);
assert!(rank > 0);
}
#[test]
fn test_multi_dimensional_length_mismatch() {
let bv1 = create_balanced_bitvector(1000);
let bv2 = create_balanced_bitvector(500);
let result = AdaptiveMultiDimensional::new_dual(bv1, bv2);
assert!(result.is_err());
}
#[test]
fn test_data_profile_analysis() {
let bv = create_sparse_bitvector(50000, 200); let criteria = SelectionCriteria::default();
let profile = AdaptiveRankSelect::analyze_data(&bv, &criteria);
assert_eq!(profile.total_bits, 50000);
assert_eq!(profile.ones_count, 250);
assert!((profile.density - 0.005).abs() < 0.001);
assert_eq!(profile.size_category, SizeCategory::Medium);
assert!(matches!(profile.access_pattern, AccessPattern::Mixed | AccessPattern::Sequential | AccessPattern::Random));
}
#[test]
fn test_performance_consistency() {
let bv = create_balanced_bitvector(10000);
let adaptive = AdaptiveRankSelect::new(bv.clone()).unwrap();
let reference = RankSelectInterleaved256::new(bv).unwrap();
for pos in [0, 1000, 5000, 9999] {
assert_eq!(adaptive.rank1(pos), reference.rank1(pos));
assert_eq!(adaptive.rank0(pos), reference.rank0(pos));
}
let ones_count = adaptive.count_ones();
for k in [0, ones_count / 4, ones_count / 2, ones_count - 1] {
if k < ones_count {
assert_eq!(adaptive.select1(k).unwrap(), reference.select1(k).unwrap());
}
}
}
#[test]
fn test_pattern_analysis() {
let mut regular_bv = BitVector::new();
for i in 0..1000 {
regular_bv.push(i % 8 < 4).unwrap(); }
let criteria = SelectionCriteria::default();
let profile = AdaptiveRankSelect::analyze_data(®ular_bv, &criteria);
assert!(profile.pattern_complexity < 0.5);
assert!(profile.run_length_stats.avg_ones_run > 3.0);
assert!(profile.run_length_stats.avg_zeros_run > 3.0);
assert_eq!(profile.access_pattern, AccessPattern::Sequential);
let mut random_bv = BitVector::new();
for i in 0..1000 {
random_bv.push((i * 31 + 17) % 3 == 0).unwrap(); }
let random_profile = AdaptiveRankSelect::analyze_data(&random_bv, &criteria);
assert!(random_profile.pattern_complexity > profile.pattern_complexity);
}
#[test]
fn test_adaptive_thresholds() {
let mut clustered_bv = BitVector::new();
for i in 0..1000 {
clustered_bv.push((i / 50) % 2 == 0).unwrap();
}
let criteria = SelectionCriteria {
enable_adaptive_thresholds: true,
..Default::default()
};
let profile = AdaptiveRankSelect::analyze_data(&clustered_bv, &criteria);
let (adaptive_sparse, adaptive_dense) = AdaptiveRankSelect::tune_thresholds(&profile, &criteria);
assert!(profile.clustering_coefficient > 0.5);
if profile.hardware_tier >= 3 {
assert!(adaptive_sparse >= criteria.sparse_threshold);
}
println!("Clustering: {:.3}, Original sparse: {:.3}, Adaptive sparse: {:.3}",
profile.clustering_coefficient, criteria.sparse_threshold, adaptive_sparse);
}
#[test]
fn test_entropy_calculation() {
let all_ones = BitVector::with_size(1000, true).unwrap();
let criteria = SelectionCriteria::default();
let profile_ones = AdaptiveRankSelect::analyze_data(&all_ones, &criteria);
assert_eq!(profile_ones.entropy, 0.0);
let balanced = create_balanced_bitvector(1000);
let profile_balanced = AdaptiveRankSelect::analyze_data(&balanced, &criteria);
assert!(profile_balanced.entropy > 0.9); assert!(profile_balanced.entropy <= 1.0); }
#[test]
fn test_hardware_aware_selection() {
let bv = create_balanced_bitvector(50000);
let hardware_criteria = SelectionCriteria {
min_hardware_tier: 3, enable_adaptive_thresholds: true,
..Default::default()
};
let profile = AdaptiveRankSelect::analyze_data(&bv, &hardware_criteria);
assert!(profile.hardware_tier <= 4);
println!("Detected hardware tier: {}", profile.hardware_tier);
let adaptive = AdaptiveRankSelect::with_criteria(bv, hardware_criteria);
if adaptive.is_ok() {
let adaptive = adaptive.unwrap();
println!("Hardware-aware selection: {}", adaptive.implementation_name());
assert_eq!(adaptive.rank1(1000), 500); }
}
#[test]
fn test_complex_pattern_detection() {
let mut complex_bv = BitVector::new();
for i in 0..10000 {
let bit = match i % 7 {
0 | 1 | 2 => true,
3 | 4 => false,
5 => (i / 7) % 3 == 0,
_ => false,
};
complex_bv.push(bit).unwrap();
}
let criteria = SelectionCriteria {
enable_adaptive_thresholds: true,
pattern_complexity_weight: 0.5,
clustering_weight: 0.3,
..Default::default()
};
let profile = AdaptiveRankSelect::analyze_data(&complex_bv, &criteria);
assert!(profile.pattern_complexity > 0.2);
assert!(profile.pattern_complexity < 0.8);
assert!(profile.run_length_stats.alternations > 1000);
println!("Complex pattern - Complexity: {:.3}, Alternations: {}, Avg run lengths: {:.1}/{:.1}",
profile.pattern_complexity,
profile.run_length_stats.alternations,
profile.run_length_stats.avg_ones_run,
profile.run_length_stats.avg_zeros_run);
let adaptive = AdaptiveRankSelect::with_criteria(complex_bv.clone(), criteria).unwrap();
let interleaved = RankSelectInterleaved256::new(complex_bv).unwrap();
for pos in [0, 1000, 5000, 9999] {
assert_eq!(adaptive.rank1(pos), interleaved.rank1(pos));
}
}
}