use super::entropy::{shannon_entropy, LogBase};
use super::{validate_distribution, InfoTheoryError, InfoTheoryResult};
use crate::error::NumRs2Error;
use scirs2_core::ndarray::{Array1, ArrayView1};
pub fn entropy_rate(sequence: &Array1<usize>, order: usize) -> Result<f64, NumRs2Error> {
if sequence.is_empty() {
return Err(NumRs2Error::InvalidInput("Sequence is empty".to_string()));
}
if order >= sequence.len() {
return Err(NumRs2Error::InvalidInput(format!(
"Order {} too large for sequence length {}",
order,
sequence.len()
)));
}
let max_symbol = sequence
.iter()
.max()
.ok_or_else(|| NumRs2Error::InvalidInput("Empty sequence".to_string()))?;
let alphabet_size = max_symbol + 1;
if order == 0 {
let mut counts = vec![0usize; alphabet_size];
for &symbol in sequence.iter() {
counts[symbol] += 1;
}
let total = sequence.len() as f64;
let probs: Vec<f64> = counts.iter().map(|&c| (c as f64) / total).collect();
let probs_array = Array1::from_vec(probs);
return shannon_entropy(&probs_array, LogBase::Bits);
}
let context_size = alphabet_size.pow(order as u32);
let ngram_size = alphabet_size.pow((order + 1) as u32);
let mut context_counts = vec![0usize; context_size];
let mut ngram_counts = vec![0usize; ngram_size];
for i in order..sequence.len() {
let mut context_idx = 0;
for j in 0..order {
context_idx = context_idx * alphabet_size + sequence[i - order + j];
}
context_counts[context_idx] += 1;
let ngram_idx = context_idx * alphabet_size + sequence[i];
ngram_counts[ngram_idx] += 1;
}
let total = (sequence.len() - order) as f64;
let context_probs: Vec<f64> = context_counts.iter().map(|&c| (c as f64) / total).collect();
let context_probs_array = Array1::from_vec(context_probs);
let h_context = shannon_entropy(&context_probs_array, LogBase::Bits)?;
let ngram_probs: Vec<f64> = ngram_counts.iter().map(|&c| (c as f64) / total).collect();
let ngram_probs_array = Array1::from_vec(ngram_probs);
let h_ngram = shannon_entropy(&ngram_probs_array, LogBase::Bits)?;
let rate = h_ngram - h_context;
Ok(rate.max(0.0))
}
pub fn binary_symmetric_channel_capacity(error_prob: f64) -> Result<f64, NumRs2Error> {
if !(0.0..=0.5).contains(&error_prob) {
return Err(NumRs2Error::ValueError(format!(
"Error probability must be in [0, 0.5], got {}",
error_prob
)));
}
if error_prob == 0.0 {
return Ok(1.0);
}
if (error_prob - 0.5).abs() < 1e-10 {
return Ok(0.0);
}
let probs = Array1::from_vec(vec![error_prob, 1.0 - error_prob]);
let h_p = shannon_entropy(&probs, LogBase::Bits)?;
Ok(1.0 - h_p)
}
pub fn binary_erasure_channel_capacity(erasure_prob: f64) -> Result<f64, NumRs2Error> {
if !(0.0..=1.0).contains(&erasure_prob) {
return Err(NumRs2Error::ValueError(format!(
"Erasure probability must be in [0, 1], got {}",
erasure_prob
)));
}
Ok(1.0 - erasure_prob)
}
pub fn rate_distortion_binary(source_prob: f64, distortion: f64) -> Result<f64, NumRs2Error> {
if !(0.0..=1.0).contains(&source_prob) {
return Err(NumRs2Error::ValueError(format!(
"Source probability must be in [0, 1], got {}",
source_prob
)));
}
if distortion < 0.0 {
return Err(NumRs2Error::ValueError(format!(
"Distortion must be non-negative, got {}",
distortion
)));
}
let p = source_prob;
let d_max = p.min(1.0 - p);
if distortion >= d_max {
return Ok(0.0);
}
let source_probs = Array1::from_vec(vec![p, 1.0 - p]);
let h_p = shannon_entropy(&source_probs, LogBase::Bits)?;
let distortion_probs = Array1::from_vec(vec![distortion, 1.0 - distortion]);
let h_d = shannon_entropy(&distortion_probs, LogBase::Bits)?;
Ok(h_p - h_d)
}
pub fn aic(log_likelihood: f64, num_parameters: usize) -> f64 {
-2.0 * log_likelihood + 2.0 * (num_parameters as f64)
}
pub fn aicc(
log_likelihood: f64,
num_parameters: usize,
sample_size: usize,
) -> Result<f64, NumRs2Error> {
if sample_size <= num_parameters + 1 {
return Err(NumRs2Error::ValueError(format!(
"Sample size {} must be greater than k+1 = {}",
sample_size,
num_parameters + 1
)));
}
let aic_val = aic(log_likelihood, num_parameters);
let k = num_parameters as f64;
let n = sample_size as f64;
let correction = (2.0 * k * (k + 1.0)) / (n - k - 1.0);
Ok(aic_val + correction)
}
pub fn bic(log_likelihood: f64, num_parameters: usize, sample_size: usize) -> f64 {
let k = num_parameters as f64;
let n = sample_size as f64;
-2.0 * log_likelihood + k * n.ln()
}
pub fn mdl(log_likelihood: f64, num_parameters: usize, sample_size: usize) -> f64 {
let k = num_parameters as f64;
let n = sample_size as f64;
-log_likelihood + (k / 2.0) * n.ln()
}
#[cfg(test)]
mod tests {
use super::*;
const EPSILON: f64 = 1e-10;
#[test]
fn test_entropy_rate_iid() {
let sequence = Array1::from_vec(vec![0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
let rate = entropy_rate(&sequence, 0).expect("entropy rate failed");
assert!((rate - 1.0).abs() < 0.1);
}
#[test]
fn test_entropy_rate_deterministic() {
let sequence = Array1::from_vec(vec![0, 0, 0, 0, 0, 0, 0, 0]);
let rate = entropy_rate(&sequence, 0).expect("entropy rate failed");
assert!(rate.abs() < EPSILON);
}
#[test]
fn test_entropy_rate_markov() {
let sequence = Array1::from_vec(vec![0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
let rate = entropy_rate(&sequence, 1).expect("entropy rate failed");
assert!(rate.abs() < 0.1);
}
#[test]
fn test_bsc_capacity() {
let c = binary_symmetric_channel_capacity(0.0).expect("bsc capacity failed");
assert!((c - 1.0).abs() < EPSILON);
let c2 = binary_symmetric_channel_capacity(0.5).expect("bsc capacity failed");
assert!(c2.abs() < EPSILON);
let c3 = binary_symmetric_channel_capacity(0.1).expect("bsc capacity failed");
assert!(c3 > 0.5 && c3 < 1.0);
}
#[test]
fn test_bec_capacity() {
let c = binary_erasure_channel_capacity(0.0).expect("bec capacity failed");
assert!((c - 1.0).abs() < EPSILON);
let c2 = binary_erasure_channel_capacity(0.5).expect("bec capacity failed");
assert!((c2 - 0.5).abs() < EPSILON);
let c3 = binary_erasure_channel_capacity(1.0).expect("bec capacity failed");
assert!(c3.abs() < EPSILON);
}
#[test]
fn test_rate_distortion_binary() {
let r = rate_distortion_binary(0.5, 0.0).expect("rate distortion failed");
assert!((r - 1.0).abs() < EPSILON);
let r2 = rate_distortion_binary(0.5, 0.5).expect("rate distortion failed");
assert!(r2.abs() < EPSILON);
let r3 = rate_distortion_binary(0.2, 0.0).expect("rate distortion failed");
let expected = -0.2 * 0.2_f64.log2() - 0.8 * 0.8_f64.log2();
assert!((r3 - expected).abs() < 0.01);
}
#[test]
fn test_aic() {
let log_likelihood = -100.0;
let k = 5;
let aic_val = aic(log_likelihood, k);
assert!((aic_val - 210.0).abs() < EPSILON); }
#[test]
fn test_aicc() {
let log_likelihood = -100.0;
let k = 5;
let n = 50;
let aicc_val = aicc(log_likelihood, k, n).expect("aicc failed");
let aic_val = aic(log_likelihood, k);
let correction = (2.0 * 5.0 * 6.0) / (50.0 - 5.0 - 1.0);
let expected = aic_val + correction;
assert!((aicc_val - expected).abs() < EPSILON);
assert!(aicc_val > aic_val); }
#[test]
fn test_bic() {
let log_likelihood = -100.0;
let k = 5;
let n = 100;
let bic_val = bic(log_likelihood, k, n);
let expected = -2.0 * log_likelihood + (k as f64) * (n as f64).ln();
assert!((bic_val - expected).abs() < EPSILON);
}
#[test]
fn test_mdl() {
let log_likelihood = -100.0;
let k = 5;
let n = 100;
let mdl_val = mdl(log_likelihood, k, n);
let expected = -log_likelihood + (k as f64 / 2.0) * (n as f64).ln();
assert!((mdl_val - expected).abs() < EPSILON);
}
#[test]
fn test_model_selection_comparison() {
let log_likelihood = -100.0;
let k = 5;
let n = 100;
let aic_val = aic(log_likelihood, k);
let bic_val = bic(log_likelihood, k, n);
assert!(bic_val > aic_val);
}
#[test]
fn test_coding_errors() {
assert!(binary_symmetric_channel_capacity(-0.1).is_err());
assert!(binary_symmetric_channel_capacity(0.6).is_err());
assert!(binary_erasure_channel_capacity(-0.1).is_err());
assert!(binary_erasure_channel_capacity(1.1).is_err());
assert!(rate_distortion_binary(0.5, -0.1).is_err());
assert!(aicc(-100.0, 5, 5).is_err());
}
}