use crate::histogram::Histogram32;
pub fn shannon_entropy_of_histogram32(counter: &[u32; 256], total: u64) -> f64 {
let total = total as f64;
if counter.iter().all(|&x| x > 0) {
shannon_entropy_of_histogram32_fast(counter, total)
} else {
shannon_entropy_of_histogram32_slow(counter, total)
}
}
#[inline(always)]
fn shannon_entropy_of_histogram32_fast(counter: &[u32; 256], total: f64) -> f64 {
let mut entropy0 = 0.0;
let mut entropy1 = 0.0;
let mut entropy2 = 0.0;
let mut entropy3 = 0.0;
for chunk in counter.chunks(4) {
let p0 = chunk[0] as f64 / total;
let p1 = chunk[1] as f64 / total;
let p2 = chunk[2] as f64 / total;
let p3 = chunk[3] as f64 / total;
entropy0 -= p0 * p0.log2();
entropy1 -= p1 * p1.log2();
entropy2 -= p2 * p2.log2();
entropy3 -= p3 * p3.log2();
}
entropy0 + entropy1 + entropy2 + entropy3
}
#[inline(always)]
fn shannon_entropy_of_histogram32_slow(counter: &[u32; 256], total: f64) -> f64 {
let mut entropy = 0.0;
for count in counter {
if *count == 0 {
continue;
}
let probability = *count as f64 / total;
let entropy_value = probability * probability.log2();
entropy -= entropy_value;
}
entropy
}
pub fn code_length_of_histogram32_no_size(histogram: &Histogram32) -> f64 {
let total: u64 = histogram.counter.iter().map(|&x| x as u64).sum();
code_length_of_histogram32(histogram, total)
}
pub fn code_length_of_histogram32(histogram: &Histogram32, total: u64) -> f64 {
shannon_entropy_of_histogram32(&histogram.counter, total)
}
#[cfg(test)]
mod tests {
use std::vec::Vec;
use super::*;
use crate::histogram::Histogram32;
#[test]
fn with_uniform_distribution() {
let hist = Histogram32::from_bytes(&[0, 1, 2, 3]);
let total = 4;
assert!((code_length_of_histogram32(&hist, total) - 2.0).abs() < 1e-10);
}
#[test]
fn with_single_value() {
let hist = Histogram32::from_bytes(&[1, 1, 1, 1]);
let total = 4;
assert!(code_length_of_histogram32(&hist, total).abs() < 1e-10);
}
#[test]
fn with_binary_distribution() {
let hist = Histogram32::from_bytes(&[0, 0, 1, 1]);
let total = 4;
assert!((code_length_of_histogram32(&hist, total) - 1.0).abs() < 1e-10);
}
#[test]
fn with_skewed_distribution() {
let hist = Histogram32::from_bytes(&[0, 0, 0, 1]);
let total = 4;
let expected = 0.811278124459;
assert!((code_length_of_histogram32(&hist, total) - expected).abs() < 1e-10);
}
#[test]
fn with_empty_histogram() {
let hist = Histogram32::from_bytes(&[]);
assert!((code_length_of_histogram32(&hist, 0) - 0.0).abs() < 1e-10);
}
#[test]
fn code_length_no_size_equals_with_size() {
let hist = Histogram32::from_bytes(&[0, 0, 0, 1]);
let total = 4;
assert!(
(code_length_of_histogram32_no_size(&hist) - code_length_of_histogram32(&hist, total))
.abs()
< 1e-10
);
}
#[test]
fn fast_path_matches_slow_path() {
let data: Vec<u8> = (0..10_000_u32)
.map(|x| (x * 33) as u8) .collect();
let total = data.len() as u64;
let hist = Histogram32::from_bytes(&data);
let fast = shannon_entropy_of_histogram32_fast(&hist.counter, total as f64);
let slow = shannon_entropy_of_histogram32_slow(&hist.counter, total as f64);
assert!(
(fast - slow).abs() < 1e-10,
"Non-zero case mismatch: fast={fast} slow={slow}"
);
}
}