use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2, Axis};
use scirs2_core::numeric::{Float, One, Zero};
use std::f64::consts::PI;
use crate::basic::{det, inv};
use crate::eigen::eigh;
use crate::error::{LinalgError, LinalgResult};
use crate::stats::covariance::covariancematrix;
#[derive(Debug, Clone)]
pub struct TestResult<F: Float> {
pub statistic: F,
pub p_value: F,
pub critical_value: Option<F>,
pub reject_null: bool,
pub degrees_of_freedom: Option<usize>,
}
impl<F: Float> TestResult<F> {
pub fn new(
statistic: F,
p_value: F,
critical_value: Option<F>,
significance_level: F,
degrees_of_freedom: Option<usize>,
) -> Self {
let reject_null = p_value < significance_level;
Self {
statistic,
p_value,
critical_value,
reject_null,
degrees_of_freedom,
}
}
}
#[allow(dead_code)]
pub fn box_m_test<F>(
_groups: &[ArrayView2<F>],
significance_level: F,
) -> LinalgResult<TestResult<F>>
where
F: Float
+ Zero
+ One
+ Copy
+ std::fmt::Debug
+ scirs2_core::ndarray::ScalarOperand
+ scirs2_core::numeric::FromPrimitive
+ scirs2_core::numeric::NumAssign
+ std::iter::Sum
+ Send
+ Sync
+ 'static,
{
if _groups.len() < 2 {
return Err(LinalgError::InvalidInputError(
"Need at least 2 _groups for Box's M test".to_string(),
));
}
let k = _groups.len();
let p = _groups[0].ncols();
for (i, group) in _groups.iter().enumerate() {
if group.ncols() != p {
return Err(LinalgError::ShapeError(format!(
"All _groups must have the same number of variables. Group 0 has {}, group {} has {}",
p, i, group.ncols()
)));
}
}
let mut samplesizes = Vec::with_capacity(k);
let mut group_covs = Vec::with_capacity(k);
let mut log_dets = Vec::with_capacity(k);
for group in _groups {
let n_i = group.nrows();
if n_i <= p {
return Err(LinalgError::InvalidInputError(format!(
"Each group must have more samples than variables: {n_i} <= {p}"
)));
}
samplesizes.push(n_i);
let mut cov_i = covariancematrix(group, Some(1))?;
let reg_factor = F::from(1e-10).expect("Failed to convert constant to float");
for j in 0..cov_i.nrows() {
cov_i[[j, j]] += reg_factor;
}
let det_i = det(&cov_i.view(), None)?;
if det_i <= F::zero() {
return Err(LinalgError::InvalidInputError(format!(
"Covariance matrix for group {} has non-positive determinant: {:?}. Consider increasing sample size or regularization.",
_groups.iter().position(|g| std::ptr::eq(g.as_ptr(), group.as_ptr())).unwrap_or(0),
det_i
)));
}
let log_det_i = det_i.ln();
group_covs.push(cov_i);
log_dets.push(log_det_i);
}
let total_dof: usize = samplesizes.iter().map(|&n| n - 1).sum();
let mut pooled_cov = Array2::zeros((p, p));
for (i, cov_i) in group_covs.iter().enumerate() {
let weight = F::from(samplesizes[i] - 1).expect("Failed to convert to float")
/ F::from(total_dof).expect("Failed to convert to float");
pooled_cov += &(cov_i * weight);
}
let reg_factor = F::from(1e-10).expect("Failed to convert constant to float");
for i in 0..pooled_cov.nrows() {
pooled_cov[[i, i]] += reg_factor;
}
let det_pooled = det::<F>(&pooled_cov.view(), None)?;
if det_pooled <= F::zero() {
return Err(LinalgError::InvalidInputError(format!(
"Pooled covariance matrix has non-positive determinant: {det_pooled:?}. Consider increasing sample sizes or regularization."
)));
}
let log_det_pooled = det_pooled.ln();
let mut m_statistic = F::from(total_dof).expect("Failed to convert to float") * log_det_pooled;
for (i, &log_det_i) in log_dets.iter().enumerate() {
m_statistic -= F::from(samplesizes[i] - 1).expect("Failed to convert to float") * log_det_i;
}
let c1 = compute_box_correction_c1(&samplesizes, p)?;
let chi_square_stat = m_statistic * (F::one() - c1);
if !chi_square_stat.is_finite() {
return Err(LinalgError::InvalidInputError(
"Box's M statistic is not finite. This may indicate numerical instability due to ill-conditioned covariance matrices.".to_string(),
));
}
let df = (k - 1) * p * (p + 1) / 2;
let p_value = chi_square_survival_function(chi_square_stat, df)?;
Ok(TestResult::new(
chi_square_stat,
p_value,
None,
significance_level,
Some(df),
))
}
#[allow(dead_code)]
pub fn mauchly_sphericity_test<F>(
data: &ArrayView2<F>,
significance_level: F,
) -> LinalgResult<TestResult<F>>
where
F: Float
+ Zero
+ One
+ Copy
+ std::fmt::Debug
+ scirs2_core::ndarray::ScalarOperand
+ scirs2_core::numeric::FromPrimitive
+ scirs2_core::numeric::NumAssign
+ std::iter::Sum
+ Send
+ Sync
+ 'static,
{
let n = data.nrows();
let p = data.ncols();
if n <= p {
return Err(LinalgError::InvalidInputError(format!(
"Need more samples than variables: {n} <= {p}"
)));
}
let cov = covariancematrix(data, Some(1))?;
let (eigenvals, _) = eigh(&cov.view(), None)?;
let geometric_mean = eigenvals
.iter()
.fold(F::one(), |acc, &val| acc * val)
.powf(F::one() / F::from(p).expect("Failed to convert to float"));
let arithmetic_mean = eigenvals.sum() / F::from(p).expect("Failed to convert to float");
let w_statistic =
(geometric_mean / arithmetic_mean).powf(F::from(p).expect("Failed to convert to float"));
let n_f = F::from(n - 1).expect("Failed to convert to float");
let _f = F::from(p * (p + 1) / 2 - 1).expect("Test: operation failed");
let chi_square_stat = -(n_f
- F::from(2 * p * p + p + 2).expect("Failed to convert to float")
/ F::from(6 * p).expect("Failed to convert to float"))
* w_statistic.ln();
let df = p * (p + 1) / 2 - 1;
let p_value = chi_square_survival_function(chi_square_stat, df)?;
Ok(TestResult::new(
w_statistic,
p_value,
None,
significance_level,
Some(df),
))
}
#[allow(dead_code)]
pub fn mardia_normality_test<F>(
data: &ArrayView2<F>,
significance_level: F,
) -> LinalgResult<(TestResult<F>, TestResult<F>)>
where
F: Float
+ Zero
+ One
+ Copy
+ std::fmt::Debug
+ scirs2_core::ndarray::ScalarOperand
+ scirs2_core::numeric::FromPrimitive
+ scirs2_core::numeric::NumAssign
+ std::iter::Sum
+ Send
+ Sync
+ 'static,
{
let n = data.nrows();
let p = data.ncols();
if n <= p {
return Err(LinalgError::InvalidInputError(format!(
"Need more samples than variables: {n} <= {p}"
)));
}
let mean = data.mean_axis(Axis(0)).expect("Test: operation failed");
let mut cov = covariancematrix(data, Some(1))?;
let reg_factor = F::from(1e-10).expect("Failed to convert constant to float");
for i in 0..cov.nrows() {
cov[[i, i]] += reg_factor;
}
let cov_inv = inv(&cov.view(), None)?;
let mut distances = Array1::zeros(n);
for i in 0..n {
let row = data.row(i);
let centered = &row - &mean;
let distance = centered.dot(&cov_inv).dot(¢ered);
distances[i] = distance;
}
let mut skewness_sum = F::zero();
for i in 0..n {
for j in 0..n {
let row_i = data.row(i);
let row_j = data.row(j);
let centered_i = &row_i - &mean;
let centered_j = &row_j - &mean;
let temp_i = centered_i.dot(&cov_inv);
let _temp_j = centered_j.dot(&cov_inv);
let cross_term = temp_i.dot(¢ered_j);
skewness_sum += cross_term.powi(3);
}
}
let skewness_stat = skewness_sum / (F::from(n).expect("Failed to convert to float").powi(2));
let kurtosis_sum = distances.iter().fold(F::zero(), |acc, &d| acc + d.powi(2));
let kurtosis_stat = kurtosis_sum / F::from(n).expect("Failed to convert to float");
let skewness_chi2 = F::from(n).expect("Failed to convert to float") * skewness_stat
/ F::from(6.0).expect("Failed to convert constant to float");
let kurtosis_z = (kurtosis_stat - F::from(p * (p + 2)).expect("Test: operation failed"))
/ (F::from(8 * p * (p + 2)).expect("Test: operation failed")
/ F::from(n).expect("Failed to convert to float"))
.sqrt();
let skewness_df = p * (p + 1) * (p + 2) / 6;
let skewness_p_value = chi_square_survival_function(skewness_chi2, skewness_df)?;
let kurtosis_p_value = F::from(2.0).expect("Failed to convert constant to float")
* standard_normal_survival_function(kurtosis_z.abs());
let skewness_result = TestResult::new(
skewness_chi2,
skewness_p_value,
None,
significance_level,
Some(skewness_df),
);
let kurtosis_result =
TestResult::new(kurtosis_z, kurtosis_p_value, None, significance_level, None);
Ok((skewness_result, kurtosis_result))
}
#[allow(dead_code)]
pub fn hotelling_t2_test<F>(
data: &ArrayView2<F>,
mu0: Option<&ArrayView1<F>>,
significance_level: F,
) -> LinalgResult<TestResult<F>>
where
F: Float
+ Zero
+ One
+ Copy
+ std::fmt::Debug
+ scirs2_core::ndarray::ScalarOperand
+ scirs2_core::numeric::FromPrimitive
+ scirs2_core::numeric::NumAssign
+ std::iter::Sum
+ Send
+ Sync
+ 'static,
{
let n = data.nrows();
let p = data.ncols();
if n <= p {
return Err(LinalgError::InvalidInputError(format!(
"Need more samples than variables: {n} <= {p}"
)));
}
let sample_mean = data.mean_axis(Axis(0)).expect("Test: operation failed");
let hypothesized_mean = match mu0 {
Some(mu) => {
if mu.len() != p {
return Err(LinalgError::ShapeError(format!(
"Hypothesized mean must have {} elements, got {}",
p,
mu.len()
)));
}
mu.to_owned()
}
None => Array1::zeros(p),
};
let diff = &sample_mean - &hypothesized_mean;
let mut cov = covariancematrix(data, Some(1))?;
let reg_factor = F::from(1e-10).expect("Failed to convert constant to float");
for i in 0..cov.nrows() {
cov[[i, i]] += reg_factor;
}
let cov_inv = inv(&cov.view(), None)?;
let t2_stat = F::from(n).expect("Failed to convert to float") * diff.dot(&cov_inv).dot(&diff);
let f_stat = t2_stat * F::from(n - p).expect("Failed to convert to float")
/ (F::from(n - 1).expect("Failed to convert to float")
* F::from(p).expect("Failed to convert to float"));
let df1 = p;
let df2 = n - p;
let p_value = f_survival_function(f_stat, df1, df2)?;
Ok(TestResult::new(
t2_stat,
p_value,
None,
significance_level,
Some(df1),
))
}
#[allow(dead_code)]
fn compute_box_correction_c1<F>(_samplesizes: &[usize], p: usize) -> LinalgResult<F>
where
F: Float + Zero + One + Copy + scirs2_core::numeric::FromPrimitive,
{
let k = _samplesizes.len();
let mut sum_inv = F::zero();
for &n_i in _samplesizes {
sum_inv = sum_inv + F::one() / F::from(n_i - 1).expect("Failed to convert to float");
}
let total_dof: usize = _samplesizes.iter().map(|&n| n - 1).sum();
let inv_total = F::one() / F::from(total_dof).expect("Failed to convert to float");
let numerator = F::from(2 * p * p + 3 * p - 1).expect("Failed to convert to float");
let denominator = F::from(6).expect("Failed to convert constant to float")
* F::from(p + 1).expect("Failed to convert to float")
* F::from(k - 1).expect("Failed to convert to float");
let c1 = (numerator / denominator) * (sum_inv - inv_total);
Ok(c1)
}
#[allow(dead_code)]
fn chi_square_survival_function<F>(x: F, df: usize) -> LinalgResult<F>
where
F: Float + Zero + One + Copy + scirs2_core::numeric::FromPrimitive,
{
if x <= F::zero() {
return Ok(F::one());
}
if df > 30 {
let z = (x - F::from(df).expect("Failed to convert to float"))
/ F::from(2 * df).expect("Failed to convert to float").sqrt();
return Ok(standard_normal_survival_function(z));
}
let approx = (-x / F::from(2.0).expect("Failed to convert constant to float")).exp();
Ok(approx.min(F::one()))
}
#[allow(dead_code)]
fn f_survival_function<F>(x: F, _df1: usize, df2: usize) -> LinalgResult<F>
where
F: Float + Zero + One + Copy + scirs2_core::numeric::FromPrimitive,
{
if x <= F::zero() {
return Ok(F::one());
}
let approx = F::one() / (F::one() + x);
Ok(approx)
}
#[allow(dead_code)]
fn standard_normal_survival_function<F>(z: F) -> F
where
F: Float + Zero + One + Copy + scirs2_core::numeric::FromPrimitive,
{
if z <= F::zero() {
return F::from(0.5).expect("Failed to convert constant to float");
}
let approx = (-z * z / F::from(2.0).expect("Failed to convert constant to float")).exp()
/ (z * F::from(2.0 * PI)
.expect("Failed to convert to float")
.sqrt());
approx.min(F::from(0.5).expect("Failed to convert constant to float"))
}
#[cfg(test)]
use scirs2_core::ndarray::array;
#[test]
#[allow(dead_code)]
fn test_hotelling_t2_test() {
let data = array![
[1.0, 2.1],
[2.1, 2.9],
[2.9, 4.1],
[4.1, 4.8],
[4.8, 6.2],
[0.5, 1.8],
[3.2, 3.7]
];
let result = hotelling_t2_test(&data.view(), None, 0.05).expect("Test: operation failed");
assert!(result.statistic.is_finite());
assert!(result.p_value.is_finite());
assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
}
#[test]
#[allow(dead_code)]
fn test_mauchly_sphericity_test() {
let data = array![
[1.0, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0],
[1.0, 1.0, 1.0],
[-1.0, -1.0, -1.0]
];
match mauchly_sphericity_test(&data.view(), 0.05) {
Ok(result) => {
assert!(result.statistic.is_finite());
assert!(result.p_value.is_finite());
assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
}
Err(LinalgError::NotImplementedError(_)) => {
}
Err(e) => panic!("Unexpected error: {:?}", e),
}
}
#[test]
#[allow(dead_code)]
fn test_mardia_normality_test() {
let data = array![
[0.0, 0.0],
[1.0, 1.0],
[-1.0, -1.0],
[0.5, -0.5],
[-0.5, 0.5]
];
let (skewness_result, kurtosis_result) =
mardia_normality_test(&data.view(), 0.05).expect("Test: operation failed");
assert!(skewness_result.statistic.is_finite());
assert!(skewness_result.p_value.is_finite());
assert!(kurtosis_result.statistic.is_finite());
assert!(kurtosis_result.p_value.is_finite());
}
#[test]
#[allow(dead_code)]
fn test_box_m_test() {
let group1 = array![
[1.0, 0.1],
[2.1, 1.2],
[2.8, 1.9],
[4.1, 3.2],
[1.5, 0.8],
[3.2, 2.1]
];
let group2 = array![
[0.2, 1.1],
[1.1, 2.2],
[1.9, 2.8],
[3.2, 4.1],
[0.8, 1.9],
[2.5, 3.3]
];
let groups = vec![group1.view(), group2.view()];
let result = box_m_test(&groups, 0.05).expect("Test: operation failed");
assert!(result.statistic.is_finite());
assert!(result.p_value.is_finite());
assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
}