use numrs2::array::Array;
use numrs2::random::{self, set_seed};
const SAMPLE_SIZE: usize = 5000;
const _CONFIDENCE_LEVEL: f64 = 0.05;
fn percentile(sorted_data: &[f64], p: f64) -> f64 {
let idx = (sorted_data.len() as f64 * p).floor() as usize;
let idx = std::cmp::min(idx, sorted_data.len() - 1);
sorted_data[idx]
}
fn ks_statistic<F>(data: &[f64], cdf: F) -> f64
where
F: Fn(f64) -> f64,
{
let mut data = data.to_vec();
data.sort_by(|a, b| a.partial_cmp(b).unwrap());
let n = data.len() as f64;
let mut max_diff = 0.0;
for (i, &x) in data.iter().enumerate() {
let ecdf = (i + 1) as f64 / n;
let theo_cdf = cdf(x);
let diff = (ecdf - theo_cdf).abs();
if diff > max_diff {
max_diff = diff;
}
}
max_diff
}
fn test_standard_normal_conformance(samples: &Array<f64>) -> bool {
let data = samples.to_vec();
let normal_cdf = |x: f64| -> f64 {
if x < -10.0 {
return 0.0;
}
if x > 10.0 {
return 1.0;
}
let erf_approx = |z: f64| -> f64 {
let a1 = 0.254829592;
let a2 = -0.284496736;
let a3 = 1.421413741;
let a4 = -1.453152027;
let a5 = 1.061405429;
let p = 0.3275911;
let sign = if z < 0.0 { -1.0 } else { 1.0 };
let z = z.abs();
let t = 1.0 / (1.0 + p * z);
let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-z * z).exp();
sign * y
};
0.5 * (1.0 + erf_approx(x / std::f64::consts::SQRT_2))
};
let ks = ks_statistic(&data, normal_cdf);
let critical_value = 1.63 / (SAMPLE_SIZE as f64).sqrt();
ks <= critical_value
}
#[test]
fn test_normal_to_chi_square_relationship() {
let k = 4;
set_seed(12345);
let mut chi_square_samples = Vec::with_capacity(SAMPLE_SIZE);
for _ in 0..SAMPLE_SIZE {
let normal_samples = random::standard_normal::<f64>(&[k]).unwrap();
let sum_of_squares: f64 = normal_samples.to_vec().iter().map(|x| x * x).sum();
chi_square_samples.push(sum_of_squares);
}
let samples = Array::from_vec(chi_square_samples);
let data = samples.to_vec();
let mean = data.iter().sum::<f64>() / data.len() as f64;
let variance = data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / data.len() as f64;
let k_f64 = k as f64;
assert!(
(mean - k_f64).abs() <= 0.3 * k_f64,
"Expected mean close to {}, got {}",
k_f64,
mean
);
assert!(
(variance - 2.0 * k_f64).abs() <= 0.5 * k_f64,
"Expected variance close to {}, got {}",
2.0 * k_f64,
variance
);
}
#[test]
fn test_gamma_to_exponential_relationship() {
let rate = 0.5; let scale = 1.0 / rate;
let large_n: usize = 50_000;
set_seed(12345);
let exp_samples = random::exponential(scale, &[large_n]).unwrap_or_else(|e| {
panic!("Failed to generate exponential samples: {e}");
});
let gamma_samples = random::gamma(1.0, scale, &[large_n]).unwrap_or_else(|e| {
panic!("Failed to generate gamma samples: {e}");
});
let exp_data = exp_samples.to_vec();
let gamma_data = gamma_samples.to_vec();
let mut sorted_exp = exp_data.clone();
sorted_exp.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let mut sorted_gamma = gamma_data.clone();
sorted_gamma.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
for p in [0.1, 0.25, 0.5, 0.75, 0.9].iter() {
let exp_percentile = percentile(&sorted_exp, *p);
let gamma_percentile = percentile(&sorted_gamma, *p);
assert!(
(exp_percentile - gamma_percentile).abs() <= 0.3 * exp_percentile,
"{}th percentile differs too much: exp={}, gamma={}",
p * 100.0,
exp_percentile,
gamma_percentile
);
}
}
#[test]
fn test_binomial_to_normal_approximation() {
let n = 100u64;
let p = 0.4;
set_seed(12345);
let binomial_samples = random::binomial::<u64>(n, p, &[SAMPLE_SIZE]).unwrap();
let binomial_f64 = Array::from_vec(
binomial_samples
.to_vec()
.iter()
.map(|&x| x as f64)
.collect(),
);
let mean = n as f64 * p;
let std_dev = (n as f64 * p * (1.0 - p)).sqrt();
set_seed(54321); let normal_samples = random::normal(mean, std_dev, &[SAMPLE_SIZE]).unwrap();
let binomial_data = binomial_f64.to_vec();
let normal_data = normal_samples.to_vec();
let mut sorted_binomial = binomial_data.clone();
sorted_binomial.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mut sorted_normal = normal_data.clone();
sorted_normal.sort_by(|a, b| a.partial_cmp(b).unwrap());
for p in [0.1, 0.25, 0.5, 0.75, 0.9].iter() {
let binomial_percentile = percentile(&sorted_binomial, *p);
let normal_percentile = percentile(&sorted_normal, *p);
assert!(
(binomial_percentile - normal_percentile).abs() <= 1.5,
"{}th percentile differs too much: binomial={}, normal={}",
p * 100.0,
binomial_percentile,
normal_percentile
);
}
}
#[test]
fn test_studentt_to_normal_relation() {
let high_df = 100.0;
set_seed(12345);
let t_samples = random::student_t(high_df, &[SAMPLE_SIZE]).unwrap();
set_seed(54321); let normal_samples = random::standard_normal::<f64>(&[SAMPLE_SIZE]).unwrap();
let t_data = t_samples.to_vec();
let mut sorted_t = t_data.clone();
sorted_t.sort_by(|a, b| a.partial_cmp(b).unwrap());
let normal_data = normal_samples.to_vec();
let mut sorted_normal = normal_data.clone();
sorted_normal.sort_by(|a, b| a.partial_cmp(b).unwrap());
for p in [0.1, 0.25, 0.5, 0.75, 0.9].iter() {
let t_percentile = percentile(&sorted_t, *p);
let normal_percentile = percentile(&sorted_normal, *p);
assert!(
(t_percentile - normal_percentile).abs() <= 0.2,
"{}th percentile differs too much: t={}, normal={}",
p * 100.0,
t_percentile,
normal_percentile
);
}
}
#[test]
fn test_uniform_sum_to_approximate_normal() {
let num_uniforms = 12;
set_seed(12345);
let mut sum_samples = Vec::with_capacity(SAMPLE_SIZE);
for _ in 0..SAMPLE_SIZE {
let uniform_samples = random::uniform(0.0, 1.0, &[num_uniforms]).unwrap();
let sum: f64 = uniform_samples.to_vec().iter().sum();
sum_samples.push(sum);
}
let samples = Array::from_vec(sum_samples);
let expected_mean = num_uniforms as f64 / 2.0;
let expected_variance = num_uniforms as f64 / 12.0;
let data = samples.to_vec();
let mean = data.iter().sum::<f64>() / data.len() as f64;
let variance = data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / data.len() as f64;
assert!(
(mean - expected_mean).abs() <= 0.15,
"Expected mean close to {}, got {}",
expected_mean,
mean
);
assert!(
(variance - expected_variance).abs() <= 0.15,
"Expected variance close to {}, got {}",
expected_variance,
variance
);
let standardized = samples
.to_vec()
.iter()
.map(|x| (x - expected_mean) / expected_variance.sqrt())
.collect::<Vec<f64>>();
let standardized_array = Array::from_vec(standardized);
assert!(
test_standard_normal_conformance(&standardized_array),
"Sum of uniform variables should approximate normal distribution"
);
}
#[test]
fn test_random_distribution_edge_cases() {
set_seed(12345);
let beta_samples = random::beta(1.0, 1.0, &[SAMPLE_SIZE]).unwrap();
let uniform_samples = random::uniform(0.0, 1.0, &[SAMPLE_SIZE]).unwrap();
let beta_data = beta_samples.to_vec();
let uniform_data = uniform_samples.to_vec();
let mut sorted_beta = beta_data.clone();
sorted_beta.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mut sorted_uniform = uniform_data.clone();
sorted_uniform.sort_by(|a, b| a.partial_cmp(b).unwrap());
for p in [0.1, 0.25, 0.5, 0.75, 0.9].iter() {
let beta_percentile = percentile(&sorted_beta, *p);
let uniform_percentile = percentile(&sorted_uniform, *p);
assert!(
(beta_percentile - uniform_percentile).abs() <= 0.1,
"{}th percentile differs too much: beta(1,1)={}, uniform(0,1)={}",
p * 100.0,
beta_percentile,
uniform_percentile
);
}
set_seed(12345);
let scale = 2.0;
let gamma_samples = random::gamma(1.0, scale, &[SAMPLE_SIZE]).unwrap();
let exp_samples = random::exponential(scale, &[SAMPLE_SIZE]).unwrap();
let gamma_mean = gamma_samples.to_vec().iter().sum::<f64>() / SAMPLE_SIZE as f64;
let exp_mean = exp_samples.to_vec().iter().sum::<f64>() / SAMPLE_SIZE as f64;
assert!(
(gamma_mean - exp_mean).abs() <= 0.3 * scale,
"Gamma(1,s) and Exponential(s) should have similar means"
);
}
#[test]
fn test_distribution_independence() {
let n = 1000;
set_seed(12345);
let normal_samples = random::normal(0.0, 1.0, &[n]).unwrap();
let uniform_samples = random::uniform(0.0, 1.0, &[n]).unwrap();
let gamma_samples = random::gamma(2.0, 1.0, &[n]).unwrap();
let exp_samples = random::exponential(1.0, &[n]).unwrap();
fn correlation(x: &[f64], y: &[f64]) -> f64 {
let n = x.len() as f64;
let x_mean = x.iter().sum::<f64>() / n;
let y_mean = y.iter().sum::<f64>() / n;
let numerator = x
.iter()
.zip(y.iter())
.map(|(&xi, &yi)| (xi - x_mean) * (yi - y_mean))
.sum::<f64>();
let x_variance = x.iter().map(|&xi| (xi - x_mean).powi(2)).sum::<f64>();
let y_variance = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum::<f64>();
numerator / (x_variance.sqrt() * y_variance.sqrt())
}
let corr_normal_uniform = correlation(&normal_samples.to_vec(), &uniform_samples.to_vec());
let corr_normal_gamma = correlation(&normal_samples.to_vec(), &gamma_samples.to_vec());
let corr_uniform_exp = correlation(&uniform_samples.to_vec(), &exp_samples.to_vec());
assert!(
corr_normal_uniform.abs() <= 0.15,
"Normal and uniform distributions should be uncorrelated, got correlation: {}",
corr_normal_uniform
);
assert!(
corr_normal_gamma.abs() <= 0.15,
"Normal and gamma distributions should be uncorrelated, got correlation: {}",
corr_normal_gamma
);
assert!(
corr_uniform_exp.abs() <= 0.15,
"Uniform and exponential distributions should be uncorrelated, got correlation: {}",
corr_uniform_exp
);
}