zipora 2.1.4

High-performance Rust implementation providing advanced data structures and compression algorithms with memory safety guarantees. Features LRU page cache, sophisticated caching layer, fiber-based concurrency, real-time compression, secure memory pools, SIMD optimizations, and complete C FFI for migration from C++.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
//! Advanced Rank/Select Variants with High-Performance Optimizations
//!
//! This module provides a comprehensive collection of rank/select implementations
//! based on research from advanced succinct data structure libraries. Each variant
//! is optimized for different use cases and memory/performance trade-offs.
//!
//! # Variants Overview
//!
//! ## Basic Variants
//! - **Removed implementations**: Previous variants have been consolidated for performance
//! - **`RankSelectInterleaved256`**: Cache-optimized interleaved layout
//!
//! ## Sparse Optimizations
//! - **Sparse support**: Available through RankSelectSparse trait implementation
//!
//! ## Multi-Dimensional Variants
//! - **Multi-dimensional**: Available through specialized implementations when needed
//!
//! ## Advanced Optimization Variants
//! - **`RankSelectFragmented`**: Fragment-based compression with adaptive encoding
//! - **`RankSelectHierarchical`**: Multi-level caching with configurable density
//! - **`RankSelectStandard`**: 3-level hierarchy (balanced space/time)
//! - **`RankSelectFast`**: 4-level hierarchy (speed optimized)
//! - **`RankSelectCompact`**: 2-level hierarchy (space optimized)
//! - **`RankSelectBalanced`**: 3-level hierarchy (optimal balance)
//! - **`RankSelectSelectOptimized`**: Dense select caches for frequent select queries
//!
//! ## Adaptive Strategy Selection
//! - **`AdaptiveRankSelect`**: Automatic implementation selection based on data density
//! - **`AdaptiveMultiDimensional`**: Adaptive selection for related bit vectors
//!
//! # Performance Characteristics
//!
//! | Variant | Memory Overhead | Rank Time | Select Time | Best Use Case |
//! |---------|----------------|-----------|-------------|---------------|
//! | Simple | ~25% | O(1) | O(log n) | Testing/reference |
//! | Separated256 | ~25% | O(1) | O(1) | General purpose |
//! | Separated512 | ~25% | O(1) | O(1) | Sequential access |
//! | Interleaved256 | ~25% | O(1) | O(1) | Random access |
//! | Few | <5% | O(log n) | O(1) | Sparse data |
//! | Mixed variants | ~30% | O(1) | O(1) | Multi-dimensional |
//! | **AdaptiveRankSelect** | **Optimal** | **O(1)** | **O(1)** | **Automatic selection** |
//!
//! # Hardware Acceleration
//!
//! All variants include comprehensive hardware acceleration with runtime detection:
//! - **BMI2**: Ultra-fast select using PDEP/PEXT instructions (5-10x speedup)
//! - **BMI1**: Fast bit manipulation with LZCNT/TZCNT/POPCNT
//! - **BZHI**: Zero high bits for efficient range operations
//! - **AVX2**: Vectorized operations for bulk processing
//! - **AVX-512**: Ultra-wide vectorization (8x parallel, nightly Rust)
//! - **ARM NEON**: Cross-platform SIMD support for ARM64
//!
//! # Cache Locality Optimizations
//!
//! Advanced cache optimization features for maximum performance:
//! - **Cache-line alignment**: All major data structures aligned to 64-byte boundaries
//! - **Software prefetching**: Hardware prefetch hints for sequential and strided access patterns
//! - **Hot/cold data separation**: Frequently accessed data prioritized in cache hierarchy
//! - **Access pattern analysis**: Adaptive optimization based on detected usage patterns
//! - **Cache performance monitoring**: Real-time metrics for hit ratios and bandwidth usage
//! - **NUMA awareness**: Memory allocation strategies for multi-socket systems
//!
//! Cache-optimized variants automatically apply these optimizations:
//! - Multi-dimensional operations with cache-optimized rank operations
//! - Cache performance metrics accessible via `.cache_metrics()`
//! - Adaptive prefetching based on access patterns
//!
//! # Fragment-Based Compression
//!
//! Advanced compression techniques for optimal space/time trade-offs:
//! - **Variable-Width Encoding**: Adaptive bit-width per fragment
//! - **Multiple Compression Modes**: Delta, run-length, bit-plane, dictionary
//! - **Hierarchical Compression**: Multi-level indexing
//! - **Cache-Aware Fragments**: 256-bit alignment for SIMD
//!
//! # Multi-Level Hierarchies
//!
//! Configurable cache hierarchies for different workloads:
//! - **2-5 Cache Levels**: From 256-bit blocks to 1M-bit ultra-blocks
//! - **Adaptive Cache Density**: Q parameters for space/time tuning
//! - **Template Specialization**: Compile-time optimization
//! - **Select Cache Optimization**: Dense caches for frequent select operations
//!
//! # Feature Flags
//!
//! - `simd`: Enable SIMD optimizations (default)
//! - `avx512`: Enable AVX-512 optimizations (requires nightly Rust)
//!
//! # Examples
//!
//! ```rust
//! use zipora::succinct::{BitVector, rank_select::{RankSelectOps, RankSelectInterleaved256, AdaptiveRankSelect}};
//!
//! // High-performance general-purpose variant
//! let mut bv = BitVector::new();
//! for i in 0..1000 {
//!     bv.push(i % 3 == 0)?;
//! }
//! let rs = RankSelectInterleaved256::new(bv.clone())?;
//! let rank = rs.rank1(500);
//! let pos = rs.select1(100)?;
//!
//! // Memory-efficient sparse variant
//! let sparse_rs = RankSelectInterleaved256::new(bv.clone())?; // Best performer for all cases
//! let sparse_rank = sparse_rs.rank1(500);
//!
//! // Adaptive selection - automatically chooses optimal implementation
//! let adaptive_rs = AdaptiveRankSelect::new(bv)?;
//! println!("Selected: {}", adaptive_rs.implementation_name());
//! let adaptive_rank = adaptive_rs.rank1(500);
//! # Ok::<(), zipora::error::ZiporaError>(())
//! ```

use crate::error::{Result, ZiporaError};
use crate::succinct::BitVector;
use std::fmt;

// BEST PERFORMER: Use RankSelectInterleaved256 as the primary implementation (121-302 Mops/s)
// This provides 50-150x better performance than the removed legacy implementations
pub use interleaved::{RankSelectInterleaved256 as RankSelect256};

// Core rank/select implementations
pub mod builder;
pub mod config;
pub mod interleaved;    // rank_select_il_256: interleaved 256-bit blocks
pub mod separated;      // rank_select_se_256: side-entry 256-bit blocks
pub mod separated_512;  // rank_select_se_512: side-entry 512-bit blocks
pub mod simple;         // rank_select_simple: minimal baseline
pub mod trivial;        // rank_select_allzero / rank_select_allone
pub mod few;            // rank_select_few: sparse bitvector (stores only pivot positions)
pub mod mixed_il_256;   // rank_select_mixed_il_256: two-dimension interleaved 256-bit
pub mod mixed_se_512;   // rank_select_mixed_se_512: two-dimension side-entry 512-bit
pub mod mixed_xl_256;   // rank_select_mixed_xl_256: multi-dimension (2-4) interleaved 256-bit
pub mod simd;

// Advanced optimization modules
pub mod adaptive;
pub mod bmi2_acceleration;
pub mod bmi2_comprehensive;
pub mod multidim_simd;

// Re-export all rank/select implementations
pub use interleaved::RankSelectInterleaved256;
pub use separated::RankSelectSE256;
pub use separated_512::{RankSelectSE512, RankSelectSE512_32};
pub use simple::RankSelectSimple;
pub use trivial::{RankSelectAllZero, RankSelectAllOne};
pub use few::{RankSelectFewOne, RankSelectFewZero};
pub use mixed_il_256::{RankSelectMixedIL256, MixedDimView};
pub use mixed_se_512::{RankSelectMixedSE512, MixedSE512DimView};
pub use mixed_xl_256::{RankSelectMixedXL256, MixedXL256DimView};
pub use separated_512::RankSelectSE512_64;
pub use config::{
    SeparatedStorageConfig, SeparatedStorageConfigBuilder, StorageLayout, MemoryStrategy,
    CacheAlignment, MultiDimensionalConfig, HardwareOptimizations, PerformanceTuning,
    AccessFrequency, DataDensity, SelectCacheDensity, FeatureDetection, CacheLevel,
    ConfigSummary,
};
pub use simd::{SimdCapabilities, SimdOps, bulk_popcount_simd, bulk_rank1_simd, bulk_select1_simd};
// Note: RankSelectSimple and RankSelectFew implementations removed - using RankSelectInterleaved256 (best performer)

// Re-export advanced optimization variants
pub use adaptive::{
    AdaptiveRankSelect, AdaptiveMultiDimensional, DataProfile, SelectionCriteria,
    AccessPattern, SizeCategory, OptimizationStats, PerformanceTier,
};
pub use bmi2_acceleration::{
    Bmi2Accelerator, Bmi2BitOps, Bmi2BlockOps, Bmi2Capabilities, Bmi2PrefetchOps, Bmi2RangeOps, 
    Bmi2RankOps, Bmi2SelectOps, Bmi2SequenceOps, Bmi2Stats,
};
pub use bmi2_comprehensive::{
    Bmi2BitOps as Bmi2BitOpsComprehensive, Bmi2BlockOps as Bmi2BlockOpsComprehensive,
    Bmi2Capabilities as Bmi2CapabilitiesComprehensive, Bmi2SequenceOps as Bmi2SequenceOpsComprehensive,
    Bmi2Stats as Bmi2StatsComprehensive, OptimizationStrategy, SequenceAnalysis as Bmi2SequenceAnalysis,
};
pub use multidim_simd::MultiDimRankSelect;
// Note: Fragment and hierarchical implementations removed - use adaptive selection instead

/// Common trait for all rank/select operations
///
/// This trait provides a unified interface for rank and select operations
/// across all variants, enabling generic algorithms and benchmarking.
pub trait RankSelectOps {
    /// Count the number of set bits up to (but not including) the given position
    ///
    /// # Arguments
    /// * `pos` - The position to count up to (exclusive)
    ///
    /// # Returns
    /// The number of set bits in the range [0, pos)
    fn rank1(&self, pos: usize) -> usize;

    /// Count the number of clear bits up to (but not including) the given position
    ///
    /// # Arguments
    /// * `pos` - The position to count up to (exclusive)
    ///
    /// # Returns
    /// The number of clear bits in the range [0, pos)
    fn rank0(&self, pos: usize) -> usize;

    /// Find the position of the k-th set bit (0-indexed)
    ///
    /// # Arguments
    /// * `k` - The 0-based index of the set bit to find
    ///
    /// # Returns
    /// The position of the k-th set bit, or an error if k >= total set bits
    fn select1(&self, k: usize) -> Result<usize>;

    /// Find the position of the k-th clear bit (0-indexed)
    ///
    /// # Arguments
    /// * `k` - The 0-based index of the clear bit to find
    ///
    /// # Returns
    /// The position of the k-th clear bit, or an error if k >= total clear bits
    fn select0(&self, k: usize) -> Result<usize>;

    /// Get the length of the underlying bit vector
    fn len(&self) -> usize;

    /// Check if the bit vector is empty
    fn is_empty(&self) -> bool {
        self.len() == 0
    }

    /// Get the total number of set bits
    fn count_ones(&self) -> usize;

    /// Get the total number of clear bits
    fn count_zeros(&self) -> usize {
        self.len() - self.count_ones()
    }

    /// Get the bit at the specified position
    fn get(&self, index: usize) -> Option<bool>;

    /// Get space overhead as a percentage of the original bit vector
    fn space_overhead_percent(&self) -> f64;
}

/// Extended trait for performance-optimized operations
///
/// This trait provides access to hardware-accelerated implementations
/// and performance monitoring capabilities.
pub trait RankSelectPerformanceOps: RankSelectOps {
    /// Hardware-accelerated rank using POPCNT instruction when available
    fn rank1_hardware_accelerated(&self, pos: usize) -> usize;

    /// Hardware-accelerated select using BMI2 instructions when available
    fn select1_hardware_accelerated(&self, k: usize) -> Result<usize>;

    /// Adaptive rank method - chooses best available implementation
    fn rank1_adaptive(&self, pos: usize) -> usize;

    /// Adaptive select method - chooses best available implementation
    fn select1_adaptive(&self, k: usize) -> Result<usize>;

    /// Bulk rank operations for multiple positions
    fn rank1_bulk(&self, positions: &[usize]) -> Vec<usize>;

    /// Bulk select operations for multiple indices
    fn select1_bulk(&self, indices: &[usize]) -> Result<Vec<usize>>;
}
/// Builder trait for constructing rank/select structures
///
/// This trait provides a uniform interface for building rank/select
/// structures from various sources with optional optimizations.
pub trait RankSelectBuilder<T> {
    /// Build from a bit vector
    fn from_bit_vector(bit_vector: BitVector) -> Result<T>;

    /// Build from an iterator of booleans
    fn from_iter<I>(iter: I) -> Result<T>
    where
        I: IntoIterator<Item = bool>;

    /// Build from a byte slice with bit interpretation
    fn from_bytes(bytes: &[u8], bit_len: usize) -> Result<T>;

    /// Build with specific optimization hints
    fn with_optimizations(bit_vector: BitVector, opts: BuilderOptions) -> Result<T>;
}

/// Options for controlling rank/select construction
#[derive(Debug, Clone)]
pub struct BuilderOptions {
    /// Enable select optimization (build select cache)
    pub optimize_select: bool,
    /// Block size for rank cache (256, 512, 1024, etc.)
    pub block_size: usize,
    /// Select sampling rate (every N set bits)
    pub select_sample_rate: usize,
    /// Enable SIMD optimizations during construction
    pub enable_simd: bool,
    /// Prefer space efficiency over speed
    pub prefer_space: bool,
}

impl Default for BuilderOptions {
    fn default() -> Self {
        Self {
            optimize_select: true,
            block_size: 256,
            select_sample_rate: 512,
            enable_simd: true,
            prefer_space: false,
        }
    }
}

/// Performance statistics for rank/select operations
#[derive(Debug, Clone)]
pub struct PerformanceStats {
    /// Total number of rank operations performed
    pub rank_operations: u64,
    /// Total number of select operations performed
    pub select_operations: u64,
    /// Number of hardware-accelerated operations
    pub hardware_accelerated_ops: u64,
    /// Average rank operation time in nanoseconds
    pub avg_rank_time_ns: f64,
    /// Average select operation time in nanoseconds
    pub avg_select_time_ns: f64,
    /// Cache hit rate for block lookups
    pub cache_hit_rate: f64,
}

impl Default for PerformanceStats {
    fn default() -> Self {
        Self {
            rank_operations: 0,
            select_operations: 0,
            hardware_accelerated_ops: 0,
            avg_rank_time_ns: 0.0,
            avg_select_time_ns: 0.0,
            cache_hit_rate: 1.0,
        }
    }
}

/// Error types specific to rank/select operations
#[derive(Debug, Clone)]
pub enum RankSelectError {
    /// Invalid index for select operation
    InvalidSelectIndex { index: usize, max_valid: usize },
    /// Invalid position for rank operation
    InvalidRankPosition { position: usize, max_valid: usize },
    /// Dimension index out of bounds for multi-dimensional operations
    InvalidDimension { dimension: usize, max_valid: usize },
    /// Sparse data construction failed
    SparseConstructionFailed(String),
    /// SIMD operation not supported on this platform
    SimdNotSupported(String),
    /// Builder configuration error
    BuilderError(String),
}

impl fmt::Display for RankSelectError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            RankSelectError::InvalidSelectIndex { index, max_valid } => {
                write!(
                    f,
                    "Invalid select index {}, maximum valid index is {}",
                    index, max_valid
                )
            }
            RankSelectError::InvalidRankPosition {
                position,
                max_valid,
            } => {
                write!(
                    f,
                    "Invalid rank position {}, maximum valid position is {}",
                    position, max_valid
                )
            }
            RankSelectError::InvalidDimension {
                dimension,
                max_valid,
            } => {
                write!(
                    f,
                    "Invalid dimension {}, maximum valid dimension is {}",
                    dimension, max_valid
                )
            }
            RankSelectError::SparseConstructionFailed(msg) => {
                write!(f, "Sparse rank/select construction failed: {}", msg)
            }
            RankSelectError::SimdNotSupported(msg) => {
                write!(f, "SIMD operation not supported: {}", msg)
            }
            RankSelectError::BuilderError(msg) => {
                write!(f, "Builder error: {}", msg)
            }
        }
    }
}

impl std::error::Error for RankSelectError {}

/// Convert RankSelectError to ZiporaError
impl From<RankSelectError> for ZiporaError {
    fn from(err: RankSelectError) -> Self {
        ZiporaError::invalid_data(err.to_string())
    }
}

/// Utility functions for rank/select operations
pub mod utils {
    use super::*;

    /// Calculate optimal block size based on data characteristics
    pub fn optimal_block_size(total_bits: usize, density: f64) -> usize {
        match (total_bits, density) {
            (_, d) if d < 0.01 => 1024,  // Very sparse - larger blocks
            (_, d) if d > 0.9 => 256,    // Very dense - smaller blocks
            (n, _) if n < 10_000 => 256, // Small data - smaller blocks
            _ => 512,                    // Default medium blocks
        }
    }

    /// Calculate optimal select sampling rate based on data characteristics
    pub fn optimal_select_sample_rate(total_ones: usize) -> usize {
        match total_ones {
            0..=1000 => 64,
            1001..=10_000 => 256,
            10_001..=100_000 => 512,
            _ => 1024,
        }
    }

    /// Estimate memory overhead for a given configuration
    pub fn estimate_memory_overhead(
        total_bits: usize,
        block_size: usize,
        select_sample_rate: usize,
        enable_select_cache: bool,
    ) -> f64 {
        let num_blocks = (total_bits + block_size - 1) / block_size;
        let rank_overhead = num_blocks * 4; // 4 bytes per block

        let select_overhead = if enable_select_cache {
            let estimated_ones = total_bits / 2; // Assume 50% density
            let num_samples = (estimated_ones + select_sample_rate - 1) / select_sample_rate;
            num_samples * 4 // 4 bytes per sample
        } else {
            0
        };

        let total_overhead_bits = (rank_overhead + select_overhead) * 8;
        (total_overhead_bits as f64 / total_bits as f64) * 100.0
    }

    /// Benchmark different rank/select implementations
    pub fn benchmark_variants(
        bit_vector: &BitVector,
        iterations: usize,
    ) -> Vec<(String, f64, f64)> {
        // This would contain actual benchmarking code
        // For now, return placeholder data
        vec![
            ("RankSelectInterleaved256".to_string(), 8.0, 80.0),
            ("AdaptiveRankSelect".to_string(), 10.0, 90.0),
        ]
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn create_test_bitvector(size: usize, pattern: fn(usize) -> bool) -> BitVector {
        let mut bv = BitVector::new();
        for i in 0..size {
            bv.push(pattern(i)).unwrap();
        }
        bv
    }

    fn create_alternating_bitvector(size: usize) -> BitVector {
        create_test_bitvector(size, |i| i % 2 == 0)
    }

    fn create_sparse_bitvector(size: usize) -> BitVector {
        create_test_bitvector(size, |i| i % 100 == 0)
    }

    fn create_dense_bitvector(size: usize) -> BitVector {
        create_test_bitvector(size, |i| i % 4 != 3)
    }

    #[test]
    fn test_builder_options_default() {
        let opts = BuilderOptions::default();
        assert!(opts.optimize_select);
        assert_eq!(opts.block_size, 256);
        assert_eq!(opts.select_sample_rate, 512);
        assert!(opts.enable_simd);
        assert!(!opts.prefer_space);
    }

    #[test]
    fn test_performance_stats_default() {
        let stats = PerformanceStats::default();
        assert_eq!(stats.rank_operations, 0);
        assert_eq!(stats.select_operations, 0);
        assert_eq!(stats.hardware_accelerated_ops, 0);
        assert_eq!(stats.avg_rank_time_ns, 0.0);
        assert_eq!(stats.avg_select_time_ns, 0.0);
        assert_eq!(stats.cache_hit_rate, 1.0);
    }

    #[test]
    fn test_utility_functions() {
        // Test optimal block size calculation
        assert_eq!(utils::optimal_block_size(1000, 0.005), 1024); // Very sparse
        assert_eq!(utils::optimal_block_size(1000, 0.95), 256); // Very dense
        assert_eq!(utils::optimal_block_size(5000, 0.5), 256); // Small data
        assert_eq!(utils::optimal_block_size(100_000, 0.5), 512); // Default

        // Test optimal select sample rate
        assert_eq!(utils::optimal_select_sample_rate(500), 64);
        assert_eq!(utils::optimal_select_sample_rate(5000), 256);
        assert_eq!(utils::optimal_select_sample_rate(50_000), 512);
        assert_eq!(utils::optimal_select_sample_rate(500_000), 1024);

        // Test memory overhead estimation
        let overhead = utils::estimate_memory_overhead(10_000, 256, 512, true);
        assert!(overhead > 0.0 && overhead < 50.0); // Reasonable overhead
    }

    #[test]
    fn test_rank_select_error_display() {
        let err = RankSelectError::InvalidSelectIndex {
            index: 10,
            max_valid: 5,
        };
        assert!(err.to_string().contains("Invalid select index 10"));

        let err = RankSelectError::InvalidDimension {
            dimension: 3,
            max_valid: 1,
        };
        assert!(err.to_string().contains("Invalid dimension 3"));

        let err = RankSelectError::SparseConstructionFailed("test error".to_string());
        assert!(
            err.to_string()
                .contains("Sparse rank/select construction failed")
        );
    }

    #[test]
    fn test_rank_select_error_conversion() {
        let rs_err = RankSelectError::BuilderError("test".to_string());
        let zipora_err: ZiporaError = rs_err.into();
        assert!(zipora_err.to_string().contains("Builder error"));
    }

    // Comprehensive tests for RankSelectInterleaved256 (best performer)
    #[test]
    fn test_interleaved256_comprehensive_operations() {
        let bv = create_alternating_bitvector(1000);

        // Test our best-performing variant
        let interleaved = RankSelectInterleaved256::new(bv.clone()).unwrap();

        // Test basic rank operations at various positions
        for pos in [0, 1, 100, 500, 999] {
            let rank = interleaved.rank1(pos);

            // For alternating pattern, rank should be approximately pos/2
            let expected_approx = pos / 2;
            assert!(
                rank >= expected_approx && rank <= expected_approx + 1,
                "Rank at position {} should be approximately {}, got {}",
                pos, expected_approx, rank
            );
        }

        // Test select operations
        let ones_count = interleaved.count_ones();
        for k in [
            0,
            ones_count / 4,
            ones_count / 2,
            ones_count * 3 / 4,
            ones_count - 1,
        ] {
            if k < ones_count {
                let select_pos = interleaved.select1(k).unwrap();

                // Verify round-trip: rank(select(k)) should be k+1
                assert_eq!(
                    interleaved.rank1(select_pos + 1),
                    k + 1,
                    "Round-trip failed: rank(select({}) + 1) = {} != {}",
                    k, interleaved.rank1(select_pos + 1), k + 1
                );
            }
        }

        // Test 0-operations as well
        for pos in [0, 1, 100, 500, 999] {
            let rank0 = interleaved.rank0(pos);
            let rank1 = interleaved.rank1(pos);

            // rank0 + rank1 should equal position (counting bits before position)
            assert_eq!(
                rank0 + rank1,
                pos,
                "rank0({}) + rank1({}) = {} + {} = {} != {}",
                pos, pos, rank0, rank1, rank0 + rank1, pos
            );
        }
    }

    #[test]
    fn test_sparse_data_performance() {
        let sparse_bv = create_sparse_bitvector(10000);

        // Test RankSelectInterleaved256 performance on sparse data
        let sparse_rs = RankSelectInterleaved256::new(sparse_bv.clone()).unwrap();

        // Test basic operations on sparse data
        assert_eq!(sparse_rs.rank1(0), 0);
        assert_eq!(sparse_rs.rank1(100), 1);
        assert_eq!(sparse_rs.rank1(200), 2);

        if sparse_rs.count_ones() > 0 {
            assert_eq!(sparse_rs.select1(0).unwrap(), 0);
        }

        // Verify correctness on sparse patterns
        let ones_count = sparse_rs.count_ones();
        for i in 0..std::cmp::min(ones_count, 10) {
            let select_pos = sparse_rs.select1(i).unwrap();
            assert_eq!(
                sparse_rs.rank1(select_pos + 1),
                i + 1,
                "Sparse data rank/select consistency failed"
            );
        }
    }

    #[test]
    fn test_different_data_patterns() {
        // Test RankSelectInterleaved256 on different data patterns
        let alternating_bv = create_alternating_bitvector(1000);
        let dense_bv = create_dense_bitvector(1000);

        let alt_rs = RankSelectInterleaved256::new(alternating_bv.clone()).unwrap();
        let dense_rs = RankSelectInterleaved256::new(dense_bv.clone()).unwrap();

        // Test operations on alternating pattern
        for pos in [0, 100, 500, 999] {
            let alt_rank = alt_rs.rank1(pos);
            let dense_rank = dense_rs.rank1(pos);

            // For alternating pattern, rank should be ~pos/2
            let expected_alt = pos / 2;
            assert!(
                alt_rank >= expected_alt && alt_rank <= expected_alt + 1,
                "Alternating pattern rank unexpected: {} at pos {}",
                alt_rank, pos
            );

            // For dense pattern, rank should be ~pos (most bits are 1)
            assert!(
                dense_rank >= pos * 3 / 4,  // At least 75% should be 1s
                "Dense pattern rank too low: {} at pos {}",
                dense_rank, pos
            );
        }

        // Test consistency across patterns
        let alt_ones = alt_rs.count_ones();
        let dense_ones = dense_rs.count_ones();

        assert!(dense_ones > alt_ones, "Dense pattern should have more 1s");
    }

    #[test]
    fn test_large_scale_operations() {
        // Test RankSelectInterleaved256 on larger data sets
        let bv1 = create_alternating_bitvector(2000);
        let bv2 = create_sparse_bitvector(2000);
        let bv3 = create_dense_bitvector(2000);

        let rs1 = RankSelectInterleaved256::new(bv1.clone()).unwrap();
        let rs2 = RankSelectInterleaved256::new(bv2.clone()).unwrap();
        let rs3 = RankSelectInterleaved256::new(bv3.clone()).unwrap();

        // Test operations across different patterns
        for pos in [0, 500, 1000, 1500, 1999] {
            let rank1 = rs1.rank1(pos);
            let rank2 = rs2.rank1(pos);
            let rank3 = rs3.rank1(pos);

            // Verify rank ordering: sparse < alternating < dense
            assert!(rank2 <= rank1, "Sparse should have fewer 1s than alternating");
            assert!(rank1 <= rank3, "Alternating should have fewer 1s than dense");

            // Test rank0 + rank1 = pos for all patterns
            assert_eq!(rs1.rank0(pos) + rank1, pos);
            assert_eq!(rs2.rank0(pos) + rank2, pos);
            assert_eq!(rs3.rank0(pos) + rank3, pos);
        }

        // Test select operations consistency
        let ones1 = rs1.count_ones();
        let ones2 = rs2.count_ones();
        let ones3 = rs3.count_ones();

        for rs in [&rs1, &rs2, &rs3] {
            let ones = rs.count_ones();
            if ones > 0 {
                for k in 0..std::cmp::min(10, ones) {
                    let select_pos = rs.select1(k).unwrap();
                    assert_eq!(rs.rank1(select_pos + 1), k + 1);
                }
            }
        }
    }

    #[test]
    fn test_simd_operations() {
        let bit_data: Vec<u64> = vec![
            0xAAAAAAAAAAAAAAAAu64, // Alternating bits
            0x5555555555555555u64, // Alternating bits (offset)
            0xFFFFFFFFFFFFFFFFu64, // All ones
            0x0000000000000000u64, // All zeros
        ];

        let positions = vec![0, 64, 128, 192, 256];
        let indices = vec![0, 10, 20, 30];

        // Test bulk operations
        let ranks = bulk_rank1_simd(&bit_data, &positions);
        assert_eq!(ranks.len(), positions.len());

        let selects = bulk_select1_simd(&bit_data, &indices);
        assert!(selects.is_ok());

        let popcounts = bulk_popcount_simd(&bit_data);
        assert_eq!(popcounts.len(), bit_data.len());
        assert_eq!(popcounts[0], 32); // Alternating pattern
        assert_eq!(popcounts[1], 32); // Alternating pattern
        assert_eq!(popcounts[2], 64); // All ones
        assert_eq!(popcounts[3], 0); // All zeros
    }

    #[test]
    fn test_simd_capabilities() {
        let caps = SimdCapabilities::detect();

        // Should detect some capability
        assert!(caps.optimization_tier <= 5);
        assert!(caps.chunk_size > 0);
        assert!(caps.chunk_size <= 64 * 1024);

        // Test cached access
        let caps2 = SimdCapabilities::get();
        assert_eq!(caps.optimization_tier, caps2.optimization_tier);
    }

    #[test]
    fn test_performance_ops_trait() {
        let bv = create_alternating_bitvector(1000);
        let interleaved = RankSelectInterleaved256::new(bv).unwrap();

        // Test performance operations
        for pos in [0, 100, 500, 999] {
            let standard_rank = interleaved.rank1(pos);
            let hw_rank = interleaved.rank1_hardware_accelerated(pos);
            let adaptive_rank = interleaved.rank1_adaptive(pos);

            assert_eq!(standard_rank, hw_rank, "Hardware rank mismatch");
            assert_eq!(standard_rank, adaptive_rank, "Adaptive rank mismatch");
        }

        // Test bulk operations
        let positions = vec![0, 100, 500, 999];
        let bulk_ranks = interleaved.rank1_bulk(&positions);

        for (i, &pos) in positions.iter().enumerate() {
            assert_eq!(bulk_ranks[i], interleaved.rank1(pos), "Bulk rank mismatch");
        }

        let ones_count = interleaved.count_ones();
        if ones_count > 0 {
            let indices = vec![0, ones_count / 4, ones_count / 2];
            let bulk_selects = interleaved.select1_bulk(&indices).unwrap();

            for (i, &k) in indices.iter().enumerate() {
                if k < ones_count {
                    assert_eq!(
                        bulk_selects[i],
                        interleaved.select1(k).unwrap(),
                        "Bulk select mismatch"
                    );
                }
            }
        }
    }

    #[test]
    fn test_builder_patterns() {
        let bv = create_alternating_bitvector(1000);

        // Test builder with options
        let opts = BuilderOptions {
            optimize_select: true,
            block_size: 512,
            select_sample_rate: 256,
            enable_simd: true,
            prefer_space: false,
        };

        let interleaved = RankSelectInterleaved256::with_optimizations(bv.clone(), opts).unwrap();

        // Should work correctly with custom options
        assert!(interleaved.len() > 0);
        assert_eq!(interleaved.rank1(0), 0);
        assert_eq!(interleaved.rank1(1), 1);

        // Test with sparse data
        let sparse_bv = create_sparse_bitvector(1000);
        let sparse_rs = RankSelectInterleaved256::new(sparse_bv).unwrap();
        // RankSelectInterleaved256 handles sparse data efficiently but uses more memory for cache optimization
        assert!(sparse_rs.space_overhead_percent() < 250.0);
    }

    #[test]
    fn test_space_overhead() {
        let bv = create_alternating_bitvector(10000);

        let alternating_rs = RankSelectInterleaved256::new(bv.clone()).unwrap();
        let sparse_bv = create_sparse_bitvector(10000);
        let sparse_rs = RankSelectInterleaved256::new(sparse_bv).unwrap();
        let dense_bv = create_dense_bitvector(10000);
        let dense_rs = RankSelectInterleaved256::new(dense_bv).unwrap();

        // Check space overhead is reasonable for all patterns
        // RankSelectInterleaved256 uses more memory for cache optimization and performance
        assert!(
            alternating_rs.space_overhead_percent() < 250.0,
            "Alternating pattern overhead too high: {:.2}%",
            alternating_rs.space_overhead_percent()
        );
        assert!(
            sparse_rs.space_overhead_percent() < 250.0,
            "Sparse pattern overhead too high: {:.2}%",
            sparse_rs.space_overhead_percent()
        );
        assert!(
            dense_rs.space_overhead_percent() < 250.0,
            "Dense pattern overhead too high: {:.2}%",
            dense_rs.space_overhead_percent()
        );

        println!("Alternating overhead: {:.2}%", alternating_rs.space_overhead_percent());
        println!("Sparse overhead: {:.2}%", sparse_rs.space_overhead_percent());
        println!("Dense overhead: {:.2}%", dense_rs.space_overhead_percent());
    }

    #[test]
    fn test_large_dataset_consistency() {
        // Test with larger dataset to verify scalability
        let large_bv = create_test_bitvector(100000, |i| (i * 13 + 7) % 71 == 0);

        let interleaved = RankSelectInterleaved256::new(large_bv.clone()).unwrap();

        // Test operations at various scales
        let test_positions = [0, 1000, 10000, 50000, 99999];
        for &pos in &test_positions {
            let rank = interleaved.rank1(pos);
            let rank0 = interleaved.rank0(pos);

            // Verify rank invariant
            assert_eq!(rank + rank0, pos, "Rank invariant failed at {}", pos);

            // Test that rank is monotonic
            if pos > 0 {
                let prev_rank = interleaved.rank1(pos - 1);
                assert!(rank >= prev_rank, "Rank should be monotonic");
            }
        }

        let ones_count = interleaved.count_ones();
        let test_ks = [0, ones_count / 10, ones_count / 2, ones_count * 9 / 10];
        for &k in &test_ks {
            if k < ones_count {
                let select_pos = interleaved.select1(k).unwrap();
                // Verify round-trip property: rank(select(k) + 1) should be k + 1
                assert_eq!(
                    interleaved.rank1(select_pos + 1),
                    k + 1,
                    "Round-trip failed: rank(select({}) + 1) != {}",
                    k, k + 1
                );
            }
        }

        println!("Large dataset ({} bits) test passed - {} ones found",
                 large_bv.len(), ones_count);
    }

    #[test]
    fn test_edge_cases() {
        // Empty bit vector
        let empty_bv = BitVector::new();
        let empty_rs = RankSelectInterleaved256::new(empty_bv).unwrap();
        assert_eq!(empty_rs.len(), 0);
        assert_eq!(empty_rs.count_ones(), 0);
        assert_eq!(empty_rs.rank1(0), 0);

        // Single bit
        let mut single_bv = BitVector::new();
        single_bv.push(true).unwrap();
        let single_rs = RankSelectInterleaved256::new(single_bv).unwrap();
        assert_eq!(single_rs.len(), 1);
        assert_eq!(single_rs.count_ones(), 1);
        assert_eq!(single_rs.rank1(0), 0);
        assert_eq!(single_rs.rank1(1), 1);
        assert_eq!(single_rs.select1(0).unwrap(), 0);

        // All zeros
        let all_zeros = BitVector::with_size(1000, false).unwrap();
        let zeros_rs = RankSelectInterleaved256::new(all_zeros).unwrap();
        assert_eq!(zeros_rs.count_ones(), 0);
        assert_eq!(zeros_rs.rank1(500), 0);
        assert!(zeros_rs.select1(0).is_err());

        // All ones
        let all_ones = BitVector::with_size(1000, true).unwrap();
        let ones_rs = RankSelectInterleaved256::new(all_ones).unwrap();
        assert_eq!(ones_rs.count_ones(), 1000);
        assert_eq!(ones_rs.rank1(500), 500);
        assert_eq!(ones_rs.select1(499).unwrap(), 499);
    }
}