#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
use crate::entropy_fast::{
distinct_byte_count, get_log2_table, histogram_8way, shannon_entropy_scalar,
};
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2,fma")]
#[allow(unsafe_op_in_unsafe_fn)]
pub(crate) unsafe fn shannon_entropy_avx2(data: &[u8]) -> f64 {
let len = data.len();
if len == 0 {
return 0.0;
}
let (counts, active_len) = histogram_8way(data);
if active_len == 0 {
return 0.0;
}
if active_len <= 255 {
let table = get_log2_table();
let mut sum = 0.0;
for &count in &counts {
if count > 0 {
sum += table[count as usize];
}
}
return (active_len as f64).log2() - sum / (active_len as f64);
}
let mut sum_v = _mm256_setzero_pd();
let len_v = _mm256_set1_pd(active_len as f64);
for k in (0..256).step_by(4) {
let counts_v = _mm_loadu_si128(counts[k..].as_ptr() as *const _);
let counts_f = _mm256_cvtepi32_pd(counts_v);
let mask_v = _mm256_cmp_pd(counts_f, _mm256_setzero_pd(), 30);
let mask_bits = _mm256_movemask_pd(mask_v);
if mask_bits == 0 {
continue;
}
let p = _mm256_div_pd(counts_f, len_v);
let log2p = approx_log2_pd(p);
let term = _mm256_mul_pd(p, log2p);
let term_masked = _mm256_and_pd(term, mask_v);
sum_v = _mm256_sub_pd(sum_v, term_masked);
}
let mut sums = [0.0f64; 4];
_mm256_storeu_pd(sums.as_mut_ptr(), sum_v);
sums.iter().sum()
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2,fma")]
#[allow(unsafe_op_in_unsafe_fn)]
unsafe fn approx_log2_pd(x: __m256d) -> __m256d {
let min_val = _mm256_set1_pd(f64::MIN_POSITIVE);
let max_val = _mm256_set1_pd(1.0);
let clamped_x = _mm256_max_pd(_mm256_min_pd(x, max_val), min_val);
let bits = _mm256_castpd_si256(clamped_x);
let e = _mm256_and_si256(_mm256_srli_epi64(bits, 52), _mm256_set1_epi64x(0x7FF_i64));
let e_packed = _mm256_permutevar8x32_epi32(e, _mm256_setr_epi32(0, 2, 4, 6, 0, 0, 0, 0));
let e_128 = _mm256_castsi256_si128(e_packed);
let e_unbiased = _mm_sub_epi32(e_128, _mm_set1_epi32(1023));
let e_f = _mm256_cvtepi32_pd(e_unbiased);
let m_bits = _mm256_or_si256(
_mm256_and_si256(bits, _mm256_set1_epi64x(0x000F_FFFF_FFFF_FFFF_i64)),
_mm256_set1_epi64x(0x3FF0_0000_0000_0000_i64),
);
let m = _mm256_castsi256_pd(m_bits);
let z = _mm256_sub_pd(m, _mm256_set1_pd(1.0));
let a1 = _mm256_set1_pd(1.442689882843058);
let a2 = _mm256_set1_pd(-0.721344529025066);
let a3 = _mm256_set1_pd(0.480884024344551);
let a4 = _mm256_set1_pd(-0.359880922880757);
let a5 = _mm256_set1_pd(0.246417534433544);
let mut poly = a5;
poly = _mm256_fmadd_pd(poly, z, a4);
poly = _mm256_fmadd_pd(poly, z, a3);
poly = _mm256_fmadd_pd(poly, z, a2);
poly = _mm256_fmadd_pd(poly, z, a1);
let log2m = _mm256_mul_pd(poly, z);
_mm256_add_pd(e_f, log2m)
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
#[allow(unsafe_op_in_unsafe_fn)]
pub(crate) unsafe fn shannon_entropy_sse2(data: &[u8]) -> f64 {
let len = data.len();
if len == 0 {
return 0.0;
}
let (counts, active_len) = histogram_8way(data);
if active_len == 0 {
return 0.0;
}
if active_len <= 255 {
let table = get_log2_table();
let mut sum = 0.0;
for &count in &counts {
if count > 0 {
sum += table[count as usize];
}
}
return (active_len as f64).log2() - sum / (active_len as f64);
}
let len_f = active_len as f64;
let mut entropy = 0.0;
for &count in &counts {
if count > 0 {
let p = count as f64 / len_f;
entropy -= p * p.log2();
}
}
entropy
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
#[allow(unsafe_op_in_unsafe_fn)]
pub(crate) unsafe fn has_high_entropy_fast_x86(data: &[u8], threshold: f64) -> bool {
if data.is_empty() {
return shannon_entropy_scalar(data) >= threshold;
}
let unique = distinct_byte_count(data);
if (unique as f64).log2() < threshold {
return false;
}
crate::entropy_fast::shannon_entropy_simd(data) >= threshold
}