use std::f64::consts::PI;
use ndarray::Array2;
use rand::Rng;
use rand_distr::{Distribution, Normal};
pub fn marchenko_pastur_density(lambda: f64, ratio: f64, sigma_sq: f64) -> f64 {
if ratio <= 0.0 || lambda <= 0.0 {
return 0.0;
}
let gamma = ratio.min(1.0 / ratio); let lambda_plus = sigma_sq * (1.0 + gamma.sqrt()).powi(2);
let lambda_minus = sigma_sq * (1.0 - gamma.sqrt()).powi(2);
if lambda < lambda_minus || lambda > lambda_plus {
return 0.0;
}
let sqrt_term = ((lambda_plus - lambda) * (lambda - lambda_minus)).sqrt();
sqrt_term / (2.0 * PI * sigma_sq * gamma * lambda)
}
pub fn marchenko_pastur_support(ratio: f64, sigma_sq: f64) -> (f64, f64) {
let gamma = ratio.min(1.0 / ratio);
let lambda_plus = sigma_sq * (1.0 + gamma.sqrt()).powi(2);
let lambda_minus = sigma_sq * (1.0 - gamma.sqrt()).powi(2);
(lambda_minus, lambda_plus)
}
pub fn wigner_semicircle_density(lambda: f64, sigma: f64) -> f64 {
let r = 2.0 * sigma;
if lambda.abs() > r {
return 0.0;
}
(2.0 / (PI * r * r)) * (r * r - lambda * lambda).sqrt()
}
pub fn sample_wishart_with<R: Rng>(rng: &mut R, n: usize, p: usize) -> Array2<f64> {
let normal = Normal::new(0.0, 1.0).expect("Normal(0, 1) should be valid");
let mut x = Array2::zeros((n, p));
for i in 0..n {
for j in 0..p {
x[[i, j]] = normal.sample(rng);
}
}
x.t().dot(&x)
}
pub fn sample_wishart(n: usize, p: usize) -> Array2<f64> {
sample_wishart_with(&mut rand::rng(), n, p)
}
pub fn sample_goe_with<R: Rng>(rng: &mut R, n: usize) -> Array2<f64> {
let normal = Normal::new(0.0, 1.0).expect("Normal(0, 1) should be valid");
let mut m = Array2::zeros((n, n));
for i in 0..n {
m[[i, i]] = normal.sample(rng) * 2.0_f64.sqrt();
}
for i in 0..n {
for j in (i + 1)..n {
let val = normal.sample(rng);
m[[i, j]] = val;
m[[j, i]] = val;
}
}
let scale = 1.0 / (n as f64).sqrt();
m *= scale;
m
}
pub fn sample_goe(n: usize) -> Array2<f64> {
sample_goe_with(&mut rand::rng(), n)
}
pub fn level_spacing_ratios(eigenvalues: &[f64]) -> Vec<f64> {
if eigenvalues.len() < 3 {
return vec![];
}
let n = eigenvalues.len();
let mut ratios = Vec::with_capacity(n - 2);
for i in 0..(n - 2) {
let s1 = eigenvalues[i + 1] - eigenvalues[i];
let s2 = eigenvalues[i + 2] - eigenvalues[i + 1];
if s1 > 0.0 && s2 > 0.0 {
ratios.push(s1.min(s2) / s1.max(s2));
}
}
ratios
}
pub fn mean_spacing_ratio(eigenvalues: &[f64]) -> f64 {
let ratios = level_spacing_ratios(eigenvalues);
if ratios.is_empty() {
0.0
} else {
ratios.iter().sum::<f64>() / (ratios.len() as f64)
}
}
pub fn empirical_spectral_density(eigenvalues: &[f64], bins: usize) -> (Vec<f64>, Vec<f64>) {
if eigenvalues.is_empty() || bins == 0 {
return (vec![], vec![]);
}
let min = eigenvalues.iter().cloned().fold(f64::INFINITY, f64::min);
let max = eigenvalues
.iter()
.cloned()
.fold(f64::NEG_INFINITY, f64::max);
if (max - min).abs() < 1e-10 {
return (vec![min], vec![1.0]);
}
let bin_width = (max - min) / bins as f64;
let mut counts = vec![0usize; bins];
for &ev in eigenvalues {
let idx = ((ev - min) / bin_width).floor() as usize;
let idx = idx.min(bins - 1);
counts[idx] += 1;
}
let n = eigenvalues.len() as f64;
let centers: Vec<f64> = (0..bins)
.map(|i| min + (i as f64 + 0.5) * bin_width)
.collect();
let densities: Vec<f64> = counts.iter().map(|&c| c as f64 / (n * bin_width)).collect();
(centers, densities)
}
pub fn stieltjes_transform(eigenvalues: &[f64], z: f64) -> f64 {
let n = eigenvalues.len() as f64;
eigenvalues.iter().map(|&ev| 1.0 / (ev - z)).sum::<f64>() / n
}
pub fn effective_dimension(eigenvalues: &[f64], n_samples: usize, n_features: usize) -> usize {
if eigenvalues.is_empty() || n_samples == 0 || n_features == 0 {
return 0;
}
let ratio = n_features as f64 / n_samples as f64;
let mut sorted: Vec<f64> = eigenvalues.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let median = sorted[sorted.len() / 2];
let sigma_sq = median.max(1e-12);
let (_, lambda_plus) = marchenko_pastur_support(ratio, sigma_sq);
eigenvalues.iter().filter(|&&ev| ev > lambda_plus).count()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_marchenko_pastur_normalization() {
let ratio = 0.5;
let (lambda_min, lambda_max) = marchenko_pastur_support(ratio, 1.0);
let n_points = 1000;
let dx = (lambda_max - lambda_min) / n_points as f64;
let integral: f64 = (0..n_points)
.map(|i| {
let x = lambda_min + (i as f64 + 0.5) * dx;
marchenko_pastur_density(x, ratio, 1.0) * dx
})
.sum();
assert!(
(integral - 1.0).abs() < 0.05,
"MP density should integrate to ~1"
);
}
#[test]
fn test_wigner_semicircle_normalization() {
let sigma = 1.0;
let r = 2.0 * sigma;
let n_points = 1000;
let dx = 2.0 * r / n_points as f64;
let integral: f64 = (0..n_points)
.map(|i| {
let x = -r + (i as f64 + 0.5) * dx;
wigner_semicircle_density(x, sigma) * dx
})
.sum();
assert!(
(integral - 1.0).abs() < 0.05,
"Wigner density should integrate to ~1"
);
}
#[test]
fn test_wigner_at_zero() {
let density = wigner_semicircle_density(0.0, 1.0);
let expected = 1.0 / PI;
assert!(
(density - expected).abs() < 0.01,
"Wigner at zero: {} vs expected {}",
density,
expected
);
}
#[test]
fn test_wishart_shape() {
let wishart = sample_wishart(100, 50);
assert_eq!(wishart.shape(), &[50, 50]);
}
#[test]
fn test_wishart_seeded_deterministic() {
use rand::SeedableRng;
let mut rng1 = rand::rngs::SmallRng::seed_from_u64(42);
let mut rng2 = rand::rngs::SmallRng::seed_from_u64(42);
let w1 = sample_wishart_with(&mut rng1, 20, 10);
let w2 = sample_wishart_with(&mut rng2, 20, 10);
assert_eq!(w1, w2);
}
#[test]
fn test_goe_symmetric() {
let goe = sample_goe(10);
for i in 0..10 {
for j in 0..10 {
assert!(
(goe[[i, j]] - goe[[j, i]]).abs() < 1e-10,
"GOE should be symmetric"
);
}
}
}
#[test]
fn test_goe_seeded_deterministic() {
use rand::SeedableRng;
let mut rng1 = rand::rngs::SmallRng::seed_from_u64(99);
let mut rng2 = rand::rngs::SmallRng::seed_from_u64(99);
let g1 = sample_goe_with(&mut rng1, 15);
let g2 = sample_goe_with(&mut rng2, 15);
assert_eq!(g1, g2);
}
#[test]
fn test_effective_dimension_with_signal() {
let mut eigenvalues = vec![10.0, 8.0, 6.0, 4.0, 3.0];
eigenvalues.extend(vec![1.0; 95]);
let dim = effective_dimension(&eigenvalues, 200, 100);
assert!(dim >= 4 && dim <= 6, "expected 4-6 signal dims, got {dim}");
}
#[test]
fn test_effective_dimension_pure_noise() {
let eigenvalues = vec![1.0; 50];
let dim = effective_dimension(&eigenvalues, 200, 50);
assert_eq!(dim, 0, "pure noise should have 0 effective dims");
}
#[test]
fn test_effective_dimension_empty() {
assert_eq!(effective_dimension(&[], 100, 50), 0);
}
#[test]
fn test_spacing_ratio_bounds() {
let eigenvalues = vec![1.0, 2.0, 3.5, 4.0, 6.0];
let ratios = level_spacing_ratios(&eigenvalues);
for r in ratios {
assert!(
(0.0..=1.0).contains(&r),
"spacing ratio should be in [0, 1]"
);
}
}
#[test]
fn test_empirical_density() {
let eigenvalues: Vec<f64> = (0..100).map(|i| i as f64 / 100.0).collect();
let (centers, densities) = empirical_spectral_density(&eigenvalues, 10);
assert_eq!(centers.len(), 10);
assert_eq!(densities.len(), 10);
for d in &densities {
assert!(*d > 0.5 && *d < 1.5);
}
}
}