use infotheory::axioms;
use infotheory::datagen;
#[cfg(feature = "backend-zpaq")]
use infotheory::{CompressionBackend, NcdVariant, ncd_bytes_backend};
use infotheory::{
RateBackend, entropy_rate_backend, marginal_entropy_bytes, mutual_information_bytes,
};
const TOLERANCE_ENTROPY: f64 = 0.1;
const TOLERANCE_MI: f64 = 0.2;
#[cfg(feature = "backend-zpaq")]
const TOLERANCE_NCD: f64 = 0.1;
#[test]
fn entropy_vs_theoretical_bernoulli() {
for p in [0.1, 0.25, 0.5, 0.75, 0.9] {
let n = 20_000;
let data = datagen::bernoulli(n, p, 42);
let estimated = marginal_entropy_bytes(&data);
let theoretical = datagen::bernoulli_entropy(p);
println!(
"p={:.2}: Est={:.4} bits/byte, Theory={:.4} bits/byte",
p, estimated, theoretical
);
assert!(
(estimated - theoretical).abs() < TOLERANCE_ENTROPY,
"p={p}: est={estimated}, theory={theoretical}, diff={}",
(estimated - theoretical).abs()
);
assert!(axioms::verify_entropy_bounds(marginal_entropy_bytes, &data));
}
}
#[test]
fn mi_independent_is_zero() {
let n = 10_000;
let (x, y) = datagen::independent_pair(n, 12345, 67890);
let x: Vec<u8> = x.iter().map(|b| b & 0x0F).collect();
let y: Vec<u8> = y.iter().map(|b| b & 0x0F).collect();
let mi = mutual_information_bytes(&x, &y, 0);
println!("Independent MI (16-sym): {:.4}", mi);
assert!(
mi.abs() < TOLERANCE_MI,
"Independent MI should be ~0, got {mi}"
);
assert!(axioms::verify_mi_nonnegative(
|a, b| mutual_information_bytes(a, b, 0),
&x,
&y
));
}
#[test]
fn rosa_matches_theoretical_markov_entropy() {
let (p00, p11) = (0.8, 0.8);
let n = 20_000; let data = datagen::markov_1_binary(n, p00, p11, 42);
let theoretical = datagen::markov_1_binary_entropy_rate(p00, p11);
let backend = RateBackend::RosaPlus;
let estimated = entropy_rate_backend(&data, 20, &backend);
println!(
"ROSA Markov: Est={:.4}, Theory={:.4}",
estimated, theoretical
);
assert!(
(estimated - theoretical).abs() < 0.5,
"ROSA entropy rate: est={estimated}, theory={theoretical}"
);
}
#[test]
fn mi_deterministic_equals_entropy() {
let n = 10_000;
let (x, y) = datagen::deterministic_func(n, 42, |b| b.wrapping_add(1));
let mi = mutual_information_bytes(&x, &y, 0);
let h_y = marginal_entropy_bytes(&y);
println!("Deterministic: MI={:.4}, H(Y)={:.4}", mi, h_y);
assert!(
(mi - h_y).abs() < TOLERANCE_MI,
"Deterministic: MI should equal H(Y). MI={mi}, H(Y)={h_y}"
);
}
#[test]
fn mi_identical_equals_entropy() {
let n = 10_000;
let (x, y) = datagen::identical_pair(n, 42);
let mi = mutual_information_bytes(&x, &y, 0);
let h_x = marginal_entropy_bytes(&x);
println!("Identical: MI={:.4}, H(X)={:.4}", mi, h_x);
assert!(
(mi - h_x).abs() < TOLERANCE_MI,
"Identical: MI should equal H(X). MI={mi}, H(X)={h_x}"
);
}
#[test]
#[cfg(feature = "backend-zpaq")]
fn ncd_identity_is_zero() {
let n = 2_000; let (x, y) = datagen::identical_pair(n, 42);
let backend = CompressionBackend::Zpaq {
method: "1".to_string(),
};
let ncd = ncd_bytes_backend(&x, &y, &backend, NcdVariant::Vitanyi);
println!("NCD(X,X) = {:.4}", ncd);
assert!(
ncd.abs() < TOLERANCE_NCD,
"NCD(x,x) should be ~0, got {ncd}"
);
}
#[test]
#[cfg(feature = "backend-zpaq")]
fn ncd_independent_is_near_one() {
let n = 2_000;
let (x, y) = datagen::independent_pair(n, 12345, 67890);
let backend = CompressionBackend::Zpaq {
method: "1".to_string(),
};
let ncd = ncd_bytes_backend(&x, &y, &backend, NcdVariant::Vitanyi);
println!("NCD(X,Y) independent = {:.4}", ncd);
assert!(ncd > 0.8, "NCD(random, random) should be > 0.8, got {ncd}");
}
#[test]
#[cfg(feature = "backend-zpaq")]
fn ncd_triangle_inequality() {
let n = 1_000;
let x = datagen::uniform_random(n, 111);
let y = datagen::uniform_random(n, 222);
let z = datagen::uniform_random(n, 333);
let backend = CompressionBackend::Zpaq {
method: "1".to_string(),
};
let metric = |a: &[u8], b: &[u8]| ncd_bytes_backend(a, b, &backend, NcdVariant::Vitanyi);
assert!(
axioms::verify_triangle_inequality(metric, &x, &y, &z, 0.1),
"Triangle inequality failed for random triplet"
);
}
#[test]
fn ctw_matches_theoretical_markov_entropy() {
let (p00, p11) = (0.9, 0.9);
let n = 10_000;
let data = datagen::markov_1_binary(n, p00, p11, 42);
let theoretical = datagen::markov_1_binary_entropy_rate(p00, p11);
let backend = RateBackend::Ctw { depth: 8 };
let estimated = entropy_rate_backend(&data, -1, &backend);
println!(
"CTW Markov: Est={:.4}, Theory={:.4}",
estimated, theoretical
);
assert!(
(estimated - theoretical).abs() < 0.2,
"CTW entropy rate: est={estimated}, theory={theoretical}"
);
}