use crate::error::{Result, ZiporaError};
use crate::system::{CpuFeatures, get_cpu_features};
use crate::succinct::rank_select::bmi2_acceleration::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
#[derive(Debug, Clone)]
pub struct BitOpsConfig {
pub enable_bmi2: bool,
pub enable_avx2: bool,
pub enable_popcnt: bool,
pub software_fallback: bool,
pub enable_compression_optimizations: bool,
pub enable_entropy_acceleration: bool,
pub enable_variable_length_decoding: bool,
}
impl Default for BitOpsConfig {
fn default() -> Self {
let features = get_cpu_features();
Self {
enable_bmi2: features.has_bmi2,
enable_avx2: features.has_avx2,
enable_popcnt: features.has_popcnt,
software_fallback: true,
enable_compression_optimizations: true,
enable_entropy_acceleration: true,
enable_variable_length_decoding: true,
}
}
}
pub struct BitOps {
config: BitOpsConfig,
features: &'static CpuFeatures,
}
impl BitOps {
pub fn new() -> Self {
Self::with_config(BitOpsConfig::default())
}
pub fn with_config(config: BitOpsConfig) -> Self {
let features = get_cpu_features();
Self { config, features }
}
#[inline]
pub fn popcount32(&self, x: u32) -> u32 {
if self.config.enable_popcnt && self.features.has_popcnt {
#[cfg(target_arch = "x86_64")]
unsafe {
_popcnt32(x as i32) as u32
}
#[cfg(not(target_arch = "x86_64"))]
x.count_ones()
} else if self.config.software_fallback {
self.popcount32_software(x)
} else {
x.count_ones()
}
}
#[inline]
pub fn popcount64(&self, x: u64) -> u32 {
if self.config.enable_popcnt && self.features.has_popcnt {
#[cfg(target_arch = "x86_64")]
unsafe {
_popcnt64(x as i64) as u32
}
#[cfg(not(target_arch = "x86_64"))]
x.count_ones()
} else if self.config.software_fallback {
self.popcount64_software(x)
} else {
x.count_ones()
}
}
#[inline]
pub fn trailing_zeros32(&self, x: u32) -> u32 {
if x == 0 {
return 32;
}
#[cfg(target_arch = "x86_64")]
if self.config.enable_bmi2 && self.features.has_bmi2 {
unsafe {
_tzcnt_u32(x)
}
} else {
x.trailing_zeros()
}
#[cfg(not(target_arch = "x86_64"))]
x.trailing_zeros()
}
#[inline]
pub fn trailing_zeros64(&self, x: u64) -> u32 {
if x == 0 {
return 64;
}
#[cfg(target_arch = "x86_64")]
if self.config.enable_bmi2 && self.features.has_bmi2 {
unsafe {
_tzcnt_u64(x) as u32
}
} else {
x.trailing_zeros()
}
#[cfg(not(target_arch = "x86_64"))]
x.trailing_zeros()
}
#[inline]
pub fn parallel_deposit32(&self, source: u32, mask: u32) -> u32 {
#[cfg(target_arch = "x86_64")]
if self.config.enable_bmi2 && self.features.has_bmi2 {
unsafe {
_pdep_u32(source, mask)
}
} else if self.config.software_fallback {
self.pdep32_software(source, mask)
} else {
source & mask
}
#[cfg(not(target_arch = "x86_64"))]
if self.config.software_fallback {
self.pdep32_software(source, mask)
} else {
source & mask
}
}
#[inline]
pub fn parallel_deposit64(&self, source: u64, mask: u64) -> u64 {
#[cfg(target_arch = "x86_64")]
if self.config.enable_bmi2 && self.features.has_bmi2 {
unsafe {
_pdep_u64(source, mask)
}
} else if self.config.software_fallback {
self.pdep64_software(source, mask)
} else {
source & mask
}
#[cfg(not(target_arch = "x86_64"))]
if self.config.software_fallback {
self.pdep64_software(source, mask)
} else {
source & mask
}
}
#[inline]
pub fn parallel_extract32(&self, source: u32, mask: u32) -> u32 {
#[cfg(target_arch = "x86_64")]
if self.config.enable_bmi2 && self.features.has_bmi2 {
unsafe {
_pext_u32(source, mask)
}
} else if self.config.software_fallback {
self.pext32_software(source, mask)
} else {
source & mask
}
#[cfg(not(target_arch = "x86_64"))]
if self.config.software_fallback {
self.pext32_software(source, mask)
} else {
source & mask
}
}
#[inline]
pub fn parallel_extract64(&self, source: u64, mask: u64) -> u64 {
#[cfg(target_arch = "x86_64")]
if self.config.enable_bmi2 && self.features.has_bmi2 {
unsafe {
_pext_u64(source, mask)
}
} else if self.config.software_fallback {
self.pext64_software(source, mask)
} else {
source & mask
}
#[cfg(not(target_arch = "x86_64"))]
if self.config.software_fallback {
self.pext64_software(source, mask)
} else {
source & mask
}
}
#[inline]
pub fn zero_high_bits32(&self, source: u32, index: u32) -> u32 {
#[cfg(target_arch = "x86_64")]
if self.config.enable_bmi2 && self.features.has_bmi2 {
unsafe {
_bzhi_u32(source, index)
}
} else if self.config.software_fallback {
if index >= 32 {
source
} else {
source & ((1u32 << index) - 1)
}
} else {
source
}
#[cfg(not(target_arch = "x86_64"))]
if self.config.software_fallback {
if index >= 32 {
source
} else {
source & ((1u32 << index) - 1)
}
} else {
source
}
}
#[inline]
pub fn zero_high_bits64(&self, source: u64, index: u32) -> u64 {
#[cfg(target_arch = "x86_64")]
if self.config.enable_bmi2 && self.features.has_bmi2 {
unsafe {
_bzhi_u64(source, index)
}
} else if self.config.software_fallback {
if index >= 64 {
source
} else {
source & ((1u64 << index) - 1)
}
} else {
source
}
#[cfg(not(target_arch = "x86_64"))]
if self.config.software_fallback {
if index >= 64 {
source
} else {
source & ((1u64 << index) - 1)
}
} else {
source
}
}
#[inline]
pub fn select_bit32(&self, x: u32, k: u32) -> Option<u32> {
if x == 0 || k >= self.popcount32(x) {
return None;
}
#[cfg(target_arch = "x86_64")]
if self.config.enable_bmi2 && self.features.has_bmi2 {
unsafe {
let deposited = _pdep_u32(1u32 << k, x);
Some(_tzcnt_u32(deposited))
}
} else if self.config.software_fallback {
self.select_bit32_software(x, k)
} else {
None
}
#[cfg(not(target_arch = "x86_64"))]
if self.config.software_fallback {
self.select_bit32_software(x, k)
} else {
None
}
}
#[inline]
pub fn select_bit64(&self, x: u64, k: u32) -> Option<u32> {
if x == 0 || k >= self.popcount64(x) {
return None;
}
#[cfg(target_arch = "x86_64")]
if self.config.enable_bmi2 && self.features.has_bmi2 {
unsafe {
let deposited = _pdep_u64(1u64 << k, x);
Some(_tzcnt_u64(deposited) as u32)
}
} else if self.config.software_fallback {
self.select_bit64_software(x, k)
} else {
None
}
#[cfg(not(target_arch = "x86_64"))]
if self.config.software_fallback {
self.select_bit64_software(x, k)
} else {
None
}
}
#[inline]
pub fn vectorized_popcount(&self, data: &[u64]) -> Vec<u32> {
#[cfg(target_arch = "x86_64")]
if self.config.enable_avx2 && self.features.has_avx2 && data.len() >= 4 {
return self.vectorized_popcount_avx2(data);
}
#[cfg(not(target_arch = "x86_64"))]
{}
data.iter().map(|&x| self.popcount64(x)).collect()
}
pub fn config(&self) -> &BitOpsConfig {
&self.config
}
pub fn features(&self) -> &CpuFeatures {
&self.features
}
#[inline]
pub fn has_bmi2(&self) -> bool {
self.config.enable_bmi2 && self.features.has_bmi2
}
#[inline]
pub fn bit_reverse_bmi2(&self, x: u64) -> u64 {
if self.config.enable_bmi2 && self.features.has_bmi2 {
#[cfg(target_arch = "x86_64")]
{
return Bmi2BitOps::deposit_bits(x.reverse_bits(), u64::MAX);
}
}
x.reverse_bits()
}
#[inline]
pub fn parallel_bit_extract_bmi2(&self, source: u64, field_masks: &[u64]) -> Vec<u64> {
if self.config.enable_bmi2 && self.config.enable_compression_optimizations && self.features.has_bmi2 {
return Bmi2AdvancedPatterns::pext_parallel_extract(&[source], field_masks[0])
.into_iter()
.chain(
field_masks[1..].iter()
.map(|&mask| Bmi2BitOps::extract_bits(source, mask))
)
.collect();
}
field_masks.iter()
.map(|&mask| self.parallel_extract64(source, mask))
.collect()
}
#[inline]
pub fn bit_interleaving_bmi2(&self, low_bits: u32, high_bits: u32) -> u64 {
if self.config.enable_bmi2 && self.features.has_bmi2 {
#[cfg(target_arch = "x86_64")]
{
let even_mask = 0x5555555555555555u64;
let odd_mask = 0xAAAAAAAAAAAAAAAAu64;
let low_deposited = Bmi2BitOps::deposit_bits(low_bits as u64, even_mask);
let high_deposited = Bmi2BitOps::deposit_bits(high_bits as u64, odd_mask);
return low_deposited | high_deposited;
}
}
let mut result = 0u64;
for i in 0..32 {
if low_bits & (1u32 << i) != 0 {
result |= 1u64 << (i * 2);
}
if high_bits & (1u32 << i) != 0 {
result |= 1u64 << (i * 2 + 1);
}
}
result
}
#[inline]
pub fn pdep_u64(&self, x: u64, mask: u64) -> u64 {
self.parallel_deposit64(x, mask)
}
#[inline]
pub fn pext_u64(&self, x: u64, mask: u64) -> u64 {
self.parallel_extract64(x, mask)
}
#[inline]
pub fn reverse_bits32(&self, x: u32) -> u32 {
if self.config.enable_bmi2 && self.features.has_bmi2 {
#[cfg(target_arch = "x86_64")]
unsafe {
return Bmi2BitOps::deposit_bits(x.reverse_bits() as u64, u32::MAX as u64) as u32;
}
}
x.reverse_bits()
}
#[inline]
pub fn decode_variable_length_bmi2(&self, bit_stream: u64, start_bit: u32, length: u32) -> Result<u32> {
if length == 0 || length > 32 {
return Err(ZiporaError::invalid_data("Invalid variable length field"));
}
if self.config.enable_bmi2 && self.config.enable_variable_length_decoding && self.features.has_bmi2 {
#[cfg(target_arch = "x86_64")]
{
let extracted = Bmi2CompressionOps::extract_entropy_field(bit_stream, start_bit, length);
return Ok(extracted);
}
}
if start_bit + length > 64 {
return Err(ZiporaError::invalid_data("Field extends beyond bit stream"));
}
let shifted = bit_stream >> start_bit;
let mask = if length == 32 { u32::MAX } else { (1u32 << length) - 1 };
Ok((shifted as u32) & mask)
}
#[inline]
pub fn encode_variable_length_bmi2(&self, value: u32, length: u32) -> Result<u64> {
if length == 0 || length > 32 {
return Err(ZiporaError::invalid_data("Invalid encoding length"));
}
if self.config.enable_bmi2 && self.config.enable_variable_length_decoding && self.features.has_bmi2 {
#[cfg(target_arch = "x86_64")]
{
let mask = if length == 32 { u64::MAX } else { (1u64 << length) - 1 };
return Ok(Bmi2BitOps::deposit_bits(value as u64, mask));
}
}
let mask = if length == 32 { u32::MAX } else { (1u32 << length) - 1 };
Ok((value & mask) as u64)
}
#[inline]
pub fn reverse_bits64(&self, x: u64) -> u64 {
if self.config.enable_bmi2 && self.features.has_bmi2 {
#[cfg(target_arch = "x86_64")]
{
return Bmi2BitOps::deposit_bits(x.reverse_bits(), u64::MAX);
}
}
x.reverse_bits()
}
#[inline]
pub fn extract_huffman_symbols_bmi2(&self, packed_symbols: u64, symbol_masks: &[u64]) -> Vec<u32> {
if self.config.enable_bmi2 && self.config.enable_entropy_acceleration && self.features.has_bmi2 {
return Bmi2CompressionOps::decode_variable_length(packed_symbols, symbol_masks);
}
symbol_masks.iter()
.map(|&mask| self.parallel_extract64(packed_symbols, mask) as u32)
.collect()
}
#[inline]
pub fn decode_rans_symbols_bmi2(&self, state: u64, frequency_mask: u64) -> u32 {
if self.config.enable_bmi2 && self.config.enable_entropy_acceleration && self.features.has_bmi2 {
return Bmi2BitOps::extract_bits(state, frequency_mask) as u32;
}
self.parallel_extract64(state, frequency_mask) as u32
}
#[inline]
pub fn fse_decode_bmi2(&self, state: u64, table_mask: u64, symbol_offset: u32) -> u32 {
if self.config.enable_bmi2 && self.config.enable_entropy_acceleration && self.features.has_bmi2 {
let extracted = Bmi2BitOps::extract_bits(state, table_mask) as u32;
return extracted.wrapping_add(symbol_offset);
}
let extracted = self.parallel_extract64(state, table_mask) as u32;
extracted.wrapping_add(symbol_offset)
}
#[inline]
fn popcount32_software(&self, mut x: u32) -> u32 {
x = x - ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
((x + (x >> 4)) & 0x0F0F0F0F).wrapping_mul(0x01010101) >> 24
}
#[inline]
fn popcount64_software(&self, mut x: u64) -> u32 {
x = x - ((x >> 1) & 0x5555555555555555);
x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0F;
((x.wrapping_mul(0x0101010101010101)) >> 56) as u32
}
#[inline]
fn pdep32_software(&self, source: u32, mask: u32) -> u32 {
let mut result = 0u32;
let mut source = source;
let mut mask = mask;
while mask != 0 {
if source & 1 != 0 {
result |= mask & (!mask + 1); }
source >>= 1;
mask &= mask - 1; }
result
}
#[inline]
fn pdep64_software(&self, source: u64, mask: u64) -> u64 {
let mut result = 0u64;
let mut source = source;
let mut mask = mask;
while mask != 0 {
if source & 1 != 0 {
result |= mask & (!mask + 1); }
source >>= 1;
mask &= mask - 1; }
result
}
#[inline]
fn pext32_software(&self, source: u32, mask: u32) -> u32 {
let mut result = 0u32;
let mut bit_idx = 0;
let mut mask = mask;
while mask != 0 {
if source & mask & (!mask + 1) != 0 {
result |= 1u32 << bit_idx;
}
bit_idx += 1;
mask &= mask - 1; }
result
}
#[inline]
fn pext64_software(&self, source: u64, mask: u64) -> u64 {
let mut result = 0u64;
let mut bit_idx = 0;
let mut mask = mask;
while mask != 0 {
if source & mask & (!mask + 1) != 0 {
result |= 1u64 << bit_idx;
}
bit_idx += 1;
mask &= mask - 1; }
result
}
fn select_bit32_software(&self, mut x: u32, k: u32) -> Option<u32> {
let mut count = 0;
for i in 0..32 {
if x & 1 != 0 {
if count == k {
return Some(i);
}
count += 1;
}
x >>= 1;
}
None
}
fn select_bit64_software(&self, mut x: u64, k: u32) -> Option<u32> {
let mut count = 0;
for i in 0..64 {
if x & 1 != 0 {
if count == k {
return Some(i);
}
count += 1;
}
x >>= 1;
}
None
}
#[cfg(target_arch = "x86_64")]
fn vectorized_popcount_avx2(&self, data: &[u64]) -> Vec<u32> {
let mut result = Vec::with_capacity(data.len());
let chunks = data.chunks_exact(4);
let remainder = chunks.remainder();
unsafe {
for chunk in chunks {
let v = _mm256_loadu_si256(chunk.as_ptr() as *const __m256i);
let mask0f = _mm256_set1_epi8(0x0f);
let lookup = _mm256_setr_epi8(
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
);
let lo = _mm256_and_si256(v, mask0f);
let hi = _mm256_and_si256(_mm256_srli_epi32(v, 4), mask0f);
let popcnt_lo = _mm256_shuffle_epi8(lookup, lo);
let popcnt_hi = _mm256_shuffle_epi8(lookup, hi);
let sum = _mm256_add_epi8(popcnt_lo, popcnt_hi);
let sum32 = _mm256_sad_epu8(sum, _mm256_setzero_si256());
let results = [
_mm256_extract_epi64(sum32, 0) as u32,
_mm256_extract_epi64(sum32, 1) as u32,
_mm256_extract_epi64(sum32, 2) as u32,
_mm256_extract_epi64(sum32, 3) as u32,
];
result.extend_from_slice(&results);
}
}
for &value in remainder {
result.push(self.popcount64(value));
}
result
}
}
impl Default for BitOps {
fn default() -> Self {
Self::new()
}
}
pub struct EntropyBitOps {
bit_ops: BitOps,
}
impl EntropyBitOps {
pub fn new() -> Self {
Self {
bit_ops: BitOps::new(),
}
}
pub fn with_config(config: BitOpsConfig) -> Self {
Self {
bit_ops: BitOps::with_config(config),
}
}
#[inline]
pub fn extract_bits(&self, stream: u64, offset: u32, width: u32) -> u32 {
if width == 0 || width > 32 {
return 0;
}
if offset == 0 {
return 0;
}
let stream16 = stream as u16;
if offset + width > 16 {
return 0; }
let shift_amount = 16 - offset - width;
let shifted = stream16 >> shift_amount;
self.bit_ops.zero_high_bits32(shifted as u32, width)
}
#[inline]
pub fn pack_bits(&self, stream: &mut u64, value: u32, offset: u32, width: u32) -> Result<()> {
if width > 32 || offset > 64 {
return Err(ZiporaError::invalid_data("Invalid bit packing parameters"));
}
let mask = if width == 32 {
0xFFFFFFFF
} else {
(1u64 << width) - 1
};
let masked_value = (value as u64) & mask;
let shift_amount = 64 - offset - width;
if shift_amount < 64 {
*stream |= masked_value << shift_amount;
}
Ok(())
}
#[inline]
pub fn reverse_bits32(&self, mut x: u32) -> u32 {
if self.bit_ops.config.enable_bmi2 && self.bit_ops.features.has_bmi2 {
#[cfg(target_arch = "x86_64")]
unsafe {
let mask = 0x55555555u32; let even = _pext_u32(x, mask);
let odd = _pext_u32(x, !mask);
_pdep_u32(even.reverse_bits() >> 16, !mask) |
_pdep_u32(odd.reverse_bits() >> 16, mask)
}
#[cfg(not(target_arch = "x86_64"))]
x.reverse_bits()
} else {
x.reverse_bits()
}
}
pub fn bit_ops(&self) -> &BitOps {
&self.bit_ops
}
}
impl Default for EntropyBitOps {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Default)]
pub struct BitOpsStats {
pub hardware_ops: u64,
pub software_ops: u64,
pub total_ops: u64,
}
impl BitOpsStats {
pub fn hardware_percentage(&self) -> f64 {
if self.total_ops == 0 {
0.0
} else {
(self.hardware_ops as f64 / self.total_ops as f64) * 100.0
}
}
}
pub struct CompressionBmi2Dispatcher {
capabilities: Bmi2Capabilities,
config: BitOpsConfig,
}
impl CompressionBmi2Dispatcher {
pub fn new() -> Self {
Self {
capabilities: Bmi2Capabilities::detect(),
config: BitOpsConfig::default(),
}
}
pub fn with_config(config: BitOpsConfig) -> Self {
Self {
capabilities: Bmi2Capabilities::detect(),
config,
}
}
pub fn dispatch_entropy_extract(&self, bit_stream: u64, start_bit: u32, field_length: u32) -> u32 {
if self.capabilities.has_bmi2 && self.config.enable_entropy_acceleration {
Bmi2CompressionOps::extract_entropy_field(bit_stream, start_bit, field_length)
} else {
let shifted = bit_stream >> start_bit;
let mask = if field_length >= 32 { u32::MAX } else { (1u32 << field_length) - 1 };
(shifted as u32) & mask
}
}
pub fn dispatch_variable_length_decode(&self, packed_data: u64, symbol_masks: &[u64]) -> Vec<u32> {
if self.capabilities.has_bmi2 && self.config.enable_variable_length_decoding {
Bmi2CompressionOps::decode_variable_length(packed_data, symbol_masks)
} else {
symbol_masks.iter()
.map(|&mask| {
let mut result = 0u32;
let mut bit_idx = 0;
let mut remaining_mask = mask;
while remaining_mask != 0 {
if packed_data & remaining_mask & (!remaining_mask + 1) != 0 {
result |= 1u32 << bit_idx;
}
bit_idx += 1;
remaining_mask &= remaining_mask - 1;
}
result
})
.collect()
}
}
pub fn dispatch_bit_stream_process(&self, data: &[u64], operation: CompressionOperation) -> Vec<u64> {
if self.capabilities.has_bmi2 && self.config.enable_compression_optimizations {
match operation {
CompressionOperation::PopCount => {
data.iter().map(|&word| Bmi2RankOps::popcount_u64(word) as u64).collect()
},
CompressionOperation::LeadingZeros => {
data.iter().map(|&word| Bmi2RankOps::leading_zeros(word) as u64).collect()
},
CompressionOperation::TrailingZeros => {
data.iter().map(|&word| Bmi2RankOps::trailing_zeros(word) as u64).collect()
},
CompressionOperation::BitReverse => {
data.iter().map(|&word| Bmi2BitOps::deposit_bits(word.reverse_bits(), u64::MAX)).collect()
},
}
} else {
match operation {
CompressionOperation::PopCount => {
data.iter().map(|&word| word.count_ones() as u64).collect()
},
CompressionOperation::LeadingZeros => {
data.iter().map(|&word| word.leading_zeros() as u64).collect()
},
CompressionOperation::TrailingZeros => {
data.iter().map(|&word| word.trailing_zeros() as u64).collect()
},
CompressionOperation::BitReverse => {
data.iter().map(|&word| word.reverse_bits()).collect()
},
}
}
}
pub fn optimization_report(&self) -> CompressionOptimizationReport {
CompressionOptimizationReport {
has_bmi2: self.capabilities.has_bmi2,
entropy_acceleration: self.config.enable_entropy_acceleration,
variable_length_acceleration: self.config.enable_variable_length_decoding,
compression_optimization: self.config.enable_compression_optimizations,
estimated_speedups: self.get_estimated_speedups(),
}
}
fn get_estimated_speedups(&self) -> std::collections::HashMap<&'static str, f64> {
let mut speedups = std::collections::HashMap::new();
if self.capabilities.has_bmi2 {
speedups.insert("Variable-length decoding", 4.0);
speedups.insert("Huffman symbol extraction", 3.0);
speedups.insert("rANS decoding", 3.5);
speedups.insert("FSE decoding", 2.5);
speedups.insert("Bit stream processing", 6.0);
speedups.insert("Dictionary compression", 2.8);
speedups.insert("Delta compression", 1.8);
speedups.insert("Bit-packed arrays", 3.2);
}
speedups
}
}
impl Default for CompressionBmi2Dispatcher {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Copy)]
pub enum CompressionOperation {
PopCount,
LeadingZeros,
TrailingZeros,
BitReverse,
}
#[derive(Debug, Clone)]
pub struct CompressionOptimizationReport {
pub has_bmi2: bool,
pub entropy_acceleration: bool,
pub variable_length_acceleration: bool,
pub compression_optimization: bool,
pub estimated_speedups: std::collections::HashMap<&'static str, f64>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bit_ops_basic() {
let bit_ops = BitOps::new();
assert_eq!(bit_ops.popcount32(0), 0);
assert_eq!(bit_ops.popcount32(0xFF), 8);
assert_eq!(bit_ops.popcount32(0xFFFFFFFF), 32);
assert_eq!(bit_ops.popcount64(0), 0);
assert_eq!(bit_ops.popcount64(0xFF), 8);
assert_eq!(bit_ops.popcount64(0xFFFFFFFFFFFFFFFF), 64);
}
#[test]
fn test_trailing_zeros() {
let bit_ops = BitOps::new();
assert_eq!(bit_ops.trailing_zeros32(1), 0);
assert_eq!(bit_ops.trailing_zeros32(2), 1);
assert_eq!(bit_ops.trailing_zeros32(4), 2);
assert_eq!(bit_ops.trailing_zeros32(8), 3);
assert_eq!(bit_ops.trailing_zeros32(0), 32);
assert_eq!(bit_ops.trailing_zeros64(1), 0);
assert_eq!(bit_ops.trailing_zeros64(2), 1);
assert_eq!(bit_ops.trailing_zeros64(0), 64);
}
#[test]
fn test_zero_high_bits() {
let bit_ops = BitOps::new();
assert_eq!(bit_ops.zero_high_bits32(0xFF, 4), 0x0F);
assert_eq!(bit_ops.zero_high_bits32(0xFFFF, 8), 0xFF);
assert_eq!(bit_ops.zero_high_bits32(0xFFFF, 32), 0xFFFF);
assert_eq!(bit_ops.zero_high_bits64(0xFF, 4), 0x0F);
assert_eq!(bit_ops.zero_high_bits64(0xFFFFFFFFFFFFFFFF, 32), 0xFFFFFFFF);
}
#[test]
fn test_parallel_deposit_extract() {
let bit_ops = BitOps::new();
let source = 0b1010;
let mask = 0b1100;
let deposited = bit_ops.parallel_deposit32(source, mask);
let extracted = bit_ops.parallel_extract32(deposited, mask);
assert!(extracted <= source);
}
#[test]
fn test_select_bit() {
let bit_ops = BitOps::new();
let x = 0b10110000;
assert_eq!(bit_ops.select_bit32(x, 0), Some(4)); assert_eq!(bit_ops.select_bit32(x, 1), Some(5)); assert_eq!(bit_ops.select_bit32(x, 2), Some(7)); assert_eq!(bit_ops.select_bit32(x, 3), None); }
#[test]
fn test_vectorized_popcount() {
let bit_ops = BitOps::new();
let data = vec![0, 0xFF, 0xFFFF, 0xFFFFFFFF, 0xFFFFFFFFFFFFFFFF];
let expected = vec![0, 8, 16, 32, 64];
let result = bit_ops.vectorized_popcount(&data);
assert_eq!(result, expected);
}
#[test]
fn test_entropy_bit_ops() {
let entropy_ops = EntropyBitOps::new();
let stream = 0b1100101000000000u64; assert_eq!(entropy_ops.extract_bits(stream, 0, 4), 0);
assert_eq!(entropy_ops.extract_bits(stream, 4, 4), 0b1010);
let mut stream = 0u64;
entropy_ops.pack_bits(&mut stream, 0b1010, 4, 4).unwrap();
assert_ne!(stream, 0);
}
#[test]
fn test_bit_reversal() {
let entropy_ops = EntropyBitOps::new();
let x = 0b10000000000000000000000000000001u32;
let reversed = entropy_ops.reverse_bits32(x);
assert_eq!(reversed, 0b10000000000000000000000000000001u32);
let x = 0b11110000000000000000000000001111u32;
let reversed = entropy_ops.reverse_bits32(x);
assert_eq!(reversed, 0b11110000000000000000000000001111u32);
}
#[test]
fn test_config_detection() {
let config = BitOpsConfig::default();
println!("BMI2 enabled: {}", config.enable_bmi2);
println!("AVX2 enabled: {}", config.enable_avx2);
println!("POPCNT enabled: {}", config.enable_popcnt);
println!("Software fallback: {}", config.software_fallback);
println!("Compression optimizations: {}", config.enable_compression_optimizations);
println!("Entropy acceleration: {}", config.enable_entropy_acceleration);
println!("Variable-length decoding: {}", config.enable_variable_length_decoding);
assert!(config.software_fallback); assert!(config.enable_compression_optimizations); assert!(config.enable_entropy_acceleration); }
#[test]
fn test_bmi2_variable_length_encoding() {
let bit_ops = BitOps::new();
let value = 0xAB;
let encoded = bit_ops.encode_variable_length_bmi2(value, 8).unwrap();
assert_eq!(encoded, 0xAB);
let bit_stream = 0xABCDEF12u64;
let decoded = bit_ops.decode_variable_length_bmi2(bit_stream, 0, 8).unwrap();
assert_eq!(decoded, 0x12);
let decoded2 = bit_ops.decode_variable_length_bmi2(bit_stream, 8, 8).unwrap();
assert_eq!(decoded2, 0xEF);
}
#[test]
fn test_entropy_huffman_extraction() {
let entropy_ops = EntropyBitOps::new();
let packed_symbols = 0x123456789ABCDEFu64;
let symbol_masks = vec![0x000Fu64, 0x00F0u64, 0x0F00u64];
let symbols = entropy_ops.bit_ops().extract_huffman_symbols_bmi2(packed_symbols, &symbol_masks);
assert!(symbols.len() == 3);
println!("Huffman symbols: {:?}", symbols);
}
#[test]
fn test_entropy_bit_ops_enhanced() {
let entropy_ops = EntropyBitOps::new();
let packed_data = 0xABCDu64;
let extracted1 = entropy_ops.extract_bits(packed_data, 4, 8);
let extracted2 = entropy_ops.extract_bits(packed_data, 12, 4);
let mut stream = 0u64;
entropy_ops.pack_bits(&mut stream, 0xAB, 8, 8).unwrap();
entropy_ops.pack_bits(&mut stream, 0xCD, 16, 8).unwrap();
let reversed = entropy_ops.reverse_bits32(0x12345678);
assert_ne!(reversed, 0x12345678); }
#[test]
fn test_bit_operations() {
let entropy_ops = EntropyBitOps::new();
let mut stream = 0u64;
entropy_ops.pack_bits(&mut stream, 0xA, 8, 4).unwrap();
entropy_ops.pack_bits(&mut stream, 0xB, 12, 4).unwrap();
entropy_ops.pack_bits(&mut stream, 0xC, 16, 4).unwrap();
let val0 = entropy_ops.extract_bits(stream, 8, 4);
let val1 = entropy_ops.extract_bits(stream, 12, 4);
let val2 = entropy_ops.extract_bits(stream, 16, 4);
assert!(val0 <= 0xF);
assert!(val1 <= 0xF);
assert!(val2 <= 0xF);
}
#[test]
fn test_compression_dispatcher() {
let dispatcher = CompressionBmi2Dispatcher::new();
let bit_stream = 0xABCDEF12u64;
let extracted = dispatcher.dispatch_entropy_extract(bit_stream, 8, 8);
assert_eq!(extracted, 0xEF);
let symbol_masks = vec![0x0Fu64, 0xF0u64];
let symbols = dispatcher.dispatch_variable_length_decode(0x123u64, &symbol_masks);
assert_eq!(symbols.len(), 2);
let data = vec![0xAAAAAAAAAAAAAAAAu64, 0x5555555555555555u64];
let popcounts = dispatcher.dispatch_bit_stream_process(&data, CompressionOperation::PopCount);
assert_eq!(popcounts, vec![32, 32]);
let report = dispatcher.optimization_report();
println!("Compression optimization report: {:?}", report);
}
#[test]
fn test_bmi2_bit_operations() {
let bit_ops = BitOps::new();
let x = 0xABCDEF1234567890u64;
let reversed = bit_ops.bit_reverse_bmi2(x);
assert_eq!(reversed, x.reverse_bits());
let source = 0xFFFF0000FFFF0000u64;
let masks = vec![0x000000000000FFFFu64, 0x00000000FFFF0000u64, 0x0000FFFF00000000u64];
let extracted = bit_ops.parallel_bit_extract_bmi2(source, &masks);
assert_eq!(extracted.len(), 3);
let low_bits = 0xAAAAu32;
let high_bits = 0x5555u32;
let interleaved = bit_ops.bit_interleaving_bmi2(low_bits, high_bits);
assert_ne!(interleaved, 0);
}
}