scirs2-linalg 0.4.0

Linear algebra module for SciRS2 (scirs2-linalg)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
//! Statistical tests for matrices
//!
//! This module provides statistical hypothesis tests that operate on matrices,
//! including tests for equality of covariance matrices, sphericity tests,
//! and multivariate normality tests.

use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2, Axis};
use scirs2_core::numeric::{Float, One, Zero};
use std::f64::consts::PI;

use crate::basic::{det, inv};
use crate::eigen::eigh;
use crate::error::{LinalgError, LinalgResult};
use crate::stats::covariance::covariancematrix;

/// Result of a statistical test
#[derive(Debug, Clone)]
pub struct TestResult<F: Float> {
    /// Test statistic value
    pub statistic: F,
    /// P-value of the test
    pub p_value: F,
    /// Critical value at specified significance level
    pub critical_value: Option<F>,
    /// Whether to reject the null hypothesis
    pub reject_null: bool,
    /// Degrees of freedom
    pub degrees_of_freedom: Option<usize>,
}

impl<F: Float> TestResult<F> {
    /// Create a new test result
    pub fn new(
        statistic: F,
        p_value: F,
        critical_value: Option<F>,
        significance_level: F,
        degrees_of_freedom: Option<usize>,
    ) -> Self {
        let reject_null = p_value < significance_level;
        Self {
            statistic,
            p_value,
            critical_value,
            reject_null,
            degrees_of_freedom,
        }
    }
}

/// Box's M test for equality of covariance matrices
///
/// Tests the null hypothesis that k groups have equal covariance matrices.
///
/// # Arguments
///
/// * `groups` - Vector of data matrices, each representing a group
/// * `significance_level` - Significance level for the test (e.g., 0.05)
///
/// # Returns
///
/// * Test result with chi-square statistic and p-value
#[allow(dead_code)]
pub fn box_m_test<F>(
    _groups: &[ArrayView2<F>],
    significance_level: F,
) -> LinalgResult<TestResult<F>>
where
    F: Float
        + Zero
        + One
        + Copy
        + std::fmt::Debug
        + scirs2_core::ndarray::ScalarOperand
        + scirs2_core::numeric::FromPrimitive
        + scirs2_core::numeric::NumAssign
        + std::iter::Sum
        + Send
        + Sync
        + 'static,
{
    if _groups.len() < 2 {
        return Err(LinalgError::InvalidInputError(
            "Need at least 2 _groups for Box's M test".to_string(),
        ));
    }

    let k = _groups.len();
    let p = _groups[0].ncols();

    // Check that all _groups have the same number of variables
    for (i, group) in _groups.iter().enumerate() {
        if group.ncols() != p {
            return Err(LinalgError::ShapeError(format!(
                "All _groups must have the same number of variables. Group 0 has {}, group {} has {}",
                p, i, group.ncols()
            )));
        }
    }

    // Compute sample sizes and covariance matrices for each group
    let mut samplesizes = Vec::with_capacity(k);
    let mut group_covs = Vec::with_capacity(k);
    let mut log_dets = Vec::with_capacity(k);

    for group in _groups {
        let n_i = group.nrows();
        if n_i <= p {
            return Err(LinalgError::InvalidInputError(format!(
                "Each group must have more samples than variables: {n_i} <= {p}"
            )));
        }

        samplesizes.push(n_i);
        let mut cov_i = covariancematrix(group, Some(1))?;

        // Add small regularization to diagonal for numerical stability
        let reg_factor = F::from(1e-10).expect("Failed to convert constant to float");
        for j in 0..cov_i.nrows() {
            cov_i[[j, j]] += reg_factor;
        }

        let det_i = det(&cov_i.view(), None)?;

        // Check for positive determinant before taking log
        if det_i <= F::zero() {
            return Err(LinalgError::InvalidInputError(format!(
                "Covariance matrix for group {} has non-positive determinant: {:?}. Consider increasing sample size or regularization.",
                _groups.iter().position(|g| std::ptr::eq(g.as_ptr(), group.as_ptr())).unwrap_or(0),
                det_i
            )));
        }

        let log_det_i = det_i.ln();
        group_covs.push(cov_i);
        log_dets.push(log_det_i);
    }

    // Compute pooled covariance matrix
    let total_dof: usize = samplesizes.iter().map(|&n| n - 1).sum();
    let mut pooled_cov = Array2::zeros((p, p));

    for (i, cov_i) in group_covs.iter().enumerate() {
        let weight = F::from(samplesizes[i] - 1).expect("Failed to convert to float")
            / F::from(total_dof).expect("Failed to convert to float");
        pooled_cov += &(cov_i * weight);
    }

    // Add regularization to pooled covariance matrix
    let reg_factor = F::from(1e-10).expect("Failed to convert constant to float");
    for i in 0..pooled_cov.nrows() {
        pooled_cov[[i, i]] += reg_factor;
    }

    let det_pooled = det::<F>(&pooled_cov.view(), None)?;

    // Check for positive determinant before taking log
    if det_pooled <= F::zero() {
        return Err(LinalgError::InvalidInputError(format!(
            "Pooled covariance matrix has non-positive determinant: {det_pooled:?}. Consider increasing sample sizes or regularization."
        )));
    }

    let log_det_pooled = det_pooled.ln();

    // Compute Box's M statistic
    let mut m_statistic = F::from(total_dof).expect("Failed to convert to float") * log_det_pooled;

    for (i, &log_det_i) in log_dets.iter().enumerate() {
        m_statistic -= F::from(samplesizes[i] - 1).expect("Failed to convert to float") * log_det_i;
    }

    // Convert to chi-square statistic using Box's correction
    let c1 = compute_box_correction_c1(&samplesizes, p)?;
    let chi_square_stat = m_statistic * (F::one() - c1);

    // Check for finite statistic
    if !chi_square_stat.is_finite() {
        return Err(LinalgError::InvalidInputError(
            "Box's M statistic is not finite. This may indicate numerical instability due to ill-conditioned covariance matrices.".to_string(),
        ));
    }

    // Degrees of freedom for chi-square distribution
    let df = (k - 1) * p * (p + 1) / 2;

    // Approximate p-value using chi-square distribution (simplified)
    let p_value = chi_square_survival_function(chi_square_stat, df)?;

    Ok(TestResult::new(
        chi_square_stat,
        p_value,
        None,
        significance_level,
        Some(df),
    ))
}

/// Mauchly's test of sphericity
///
/// Tests whether the covariance matrix has a spherical structure (all eigenvalues equal).
///
/// # Arguments
///
/// * `data` - Data matrix with samples as rows
/// * `significance_level` - Significance level for the test
///
/// # Returns
///
/// * Test result with W statistic and p-value
#[allow(dead_code)]
pub fn mauchly_sphericity_test<F>(
    data: &ArrayView2<F>,
    significance_level: F,
) -> LinalgResult<TestResult<F>>
where
    F: Float
        + Zero
        + One
        + Copy
        + std::fmt::Debug
        + scirs2_core::ndarray::ScalarOperand
        + scirs2_core::numeric::FromPrimitive
        + scirs2_core::numeric::NumAssign
        + std::iter::Sum
        + Send
        + Sync
        + 'static,
{
    let n = data.nrows();
    let p = data.ncols();

    if n <= p {
        return Err(LinalgError::InvalidInputError(format!(
            "Need more samples than variables: {n} <= {p}"
        )));
    }

    // Compute sample covariance matrix
    let cov = covariancematrix(data, Some(1))?;

    // Compute eigenvalues
    let (eigenvals, _) = eigh(&cov.view(), None)?;

    // Mauchly's W statistic
    let geometric_mean = eigenvals
        .iter()
        .fold(F::one(), |acc, &val| acc * val)
        .powf(F::one() / F::from(p).expect("Failed to convert to float"));
    let arithmetic_mean = eigenvals.sum() / F::from(p).expect("Failed to convert to float");

    let w_statistic =
        (geometric_mean / arithmetic_mean).powf(F::from(p).expect("Failed to convert to float"));

    // Convert to chi-square using Mauchly's transformation
    let n_f = F::from(n - 1).expect("Failed to convert to float");
    let _f = F::from(p * (p + 1) / 2 - 1).expect("Test: operation failed");
    let chi_square_stat = -(n_f
        - F::from(2 * p * p + p + 2).expect("Failed to convert to float")
            / F::from(6 * p).expect("Failed to convert to float"))
        * w_statistic.ln();

    let df = p * (p + 1) / 2 - 1;
    let p_value = chi_square_survival_function(chi_square_stat, df)?;

    Ok(TestResult::new(
        w_statistic,
        p_value,
        None,
        significance_level,
        Some(df),
    ))
}

/// Mardia's test for multivariate normality
///
/// Tests whether data follows a multivariate normal distribution using
/// measures of multivariate skewness and kurtosis.
///
/// # Arguments
///
/// * `data` - Data matrix with samples as rows
/// * `significance_level` - Significance level for the test
///
/// # Returns
///
/// * Test result containing both skewness and kurtosis components
#[allow(dead_code)]
pub fn mardia_normality_test<F>(
    data: &ArrayView2<F>,
    significance_level: F,
) -> LinalgResult<(TestResult<F>, TestResult<F>)>
where
    F: Float
        + Zero
        + One
        + Copy
        + std::fmt::Debug
        + scirs2_core::ndarray::ScalarOperand
        + scirs2_core::numeric::FromPrimitive
        + scirs2_core::numeric::NumAssign
        + std::iter::Sum
        + Send
        + Sync
        + 'static,
{
    let n = data.nrows();
    let p = data.ncols();

    if n <= p {
        return Err(LinalgError::InvalidInputError(format!(
            "Need more samples than variables: {n} <= {p}"
        )));
    }

    // Compute sample mean and covariance with regularization
    let mean = data.mean_axis(Axis(0)).expect("Test: operation failed");
    let mut cov = covariancematrix(data, Some(1))?;

    // Add small regularization to diagonal for numerical stability
    let reg_factor = F::from(1e-10).expect("Failed to convert constant to float");
    for i in 0..cov.nrows() {
        cov[[i, i]] += reg_factor;
    }

    let cov_inv = inv(&cov.view(), None)?;

    // Compute Mahalanobis distances
    let mut distances = Array1::zeros(n);
    for i in 0..n {
        let row = data.row(i);
        let centered = &row - &mean;
        let distance = centered.dot(&cov_inv).dot(&centered);
        distances[i] = distance;
    }

    // Mardia's skewness statistic
    let mut skewness_sum = F::zero();
    for i in 0..n {
        for j in 0..n {
            let row_i = data.row(i);
            let row_j = data.row(j);
            let centered_i = &row_i - &mean;
            let centered_j = &row_j - &mean;

            let temp_i = centered_i.dot(&cov_inv);
            let _temp_j = centered_j.dot(&cov_inv);

            let cross_term = temp_i.dot(&centered_j);
            skewness_sum += cross_term.powi(3);
        }
    }

    let skewness_stat = skewness_sum / (F::from(n).expect("Failed to convert to float").powi(2));

    // Mardia's kurtosis statistic
    let kurtosis_sum = distances.iter().fold(F::zero(), |acc, &d| acc + d.powi(2));
    let kurtosis_stat = kurtosis_sum / F::from(n).expect("Failed to convert to float");

    // Convert to test statistics
    let skewness_chi2 = F::from(n).expect("Failed to convert to float") * skewness_stat
        / F::from(6.0).expect("Failed to convert constant to float");
    let kurtosis_z = (kurtosis_stat - F::from(p * (p + 2)).expect("Test: operation failed"))
        / (F::from(8 * p * (p + 2)).expect("Test: operation failed")
            / F::from(n).expect("Failed to convert to float"))
        .sqrt();

    // Degrees of freedom and p-values
    let skewness_df = p * (p + 1) * (p + 2) / 6;
    let skewness_p_value = chi_square_survival_function(skewness_chi2, skewness_df)?;

    // For kurtosis, use standard normal approximation
    let kurtosis_p_value = F::from(2.0).expect("Failed to convert constant to float")
        * standard_normal_survival_function(kurtosis_z.abs());

    let skewness_result = TestResult::new(
        skewness_chi2,
        skewness_p_value,
        None,
        significance_level,
        Some(skewness_df),
    );

    let kurtosis_result =
        TestResult::new(kurtosis_z, kurtosis_p_value, None, significance_level, None);

    Ok((skewness_result, kurtosis_result))
}

/// Hotelling's T² test for a mean vector
///
/// Tests whether the sample mean equals a specified value.
///
/// # Arguments
///
/// * `data` - Data matrix with samples as rows
/// * `mu0` - Hypothesized mean vector (if None, tests against zero vector)
/// * `significance_level` - Significance level for the test
///
/// # Returns
///
/// * Test result with T² statistic and F-distributed p-value
#[allow(dead_code)]
pub fn hotelling_t2_test<F>(
    data: &ArrayView2<F>,
    mu0: Option<&ArrayView1<F>>,
    significance_level: F,
) -> LinalgResult<TestResult<F>>
where
    F: Float
        + Zero
        + One
        + Copy
        + std::fmt::Debug
        + scirs2_core::ndarray::ScalarOperand
        + scirs2_core::numeric::FromPrimitive
        + scirs2_core::numeric::NumAssign
        + std::iter::Sum
        + Send
        + Sync
        + 'static,
{
    let n = data.nrows();
    let p = data.ncols();

    if n <= p {
        return Err(LinalgError::InvalidInputError(format!(
            "Need more samples than variables: {n} <= {p}"
        )));
    }

    // Compute sample mean
    let sample_mean = data.mean_axis(Axis(0)).expect("Test: operation failed");

    // Use zero vector if no hypothesized mean is provided
    let hypothesized_mean = match mu0 {
        Some(mu) => {
            if mu.len() != p {
                return Err(LinalgError::ShapeError(format!(
                    "Hypothesized mean must have {} elements, got {}",
                    p,
                    mu.len()
                )));
            }
            mu.to_owned()
        }
        None => Array1::zeros(p),
    };

    // Compute difference
    let diff = &sample_mean - &hypothesized_mean;

    // Compute sample covariance matrix with regularization for numerical stability
    let mut cov = covariancematrix(data, Some(1))?;

    // Add small regularization to diagonal for numerical stability
    let reg_factor = F::from(1e-10).expect("Failed to convert constant to float");
    for i in 0..cov.nrows() {
        cov[[i, i]] += reg_factor;
    }

    let cov_inv = inv(&cov.view(), None)?;

    // Compute Hotelling's T² statistic
    let t2_stat = F::from(n).expect("Failed to convert to float") * diff.dot(&cov_inv).dot(&diff);

    // Convert to F-statistic
    let f_stat = t2_stat * F::from(n - p).expect("Failed to convert to float")
        / (F::from(n - 1).expect("Failed to convert to float")
            * F::from(p).expect("Failed to convert to float"));

    // Degrees of freedom for F-distribution
    let df1 = p;
    let df2 = n - p;

    // Approximate p-value using F-distribution (simplified)
    let p_value = f_survival_function(f_stat, df1, df2)?;

    Ok(TestResult::new(
        t2_stat,
        p_value,
        None,
        significance_level,
        Some(df1),
    ))
}

// Helper functions for computing distribution functions (simplified implementations)

#[allow(dead_code)]
fn compute_box_correction_c1<F>(_samplesizes: &[usize], p: usize) -> LinalgResult<F>
where
    F: Float + Zero + One + Copy + scirs2_core::numeric::FromPrimitive,
{
    let k = _samplesizes.len();
    let mut sum_inv = F::zero();

    for &n_i in _samplesizes {
        sum_inv = sum_inv + F::one() / F::from(n_i - 1).expect("Failed to convert to float");
    }

    let total_dof: usize = _samplesizes.iter().map(|&n| n - 1).sum();
    let inv_total = F::one() / F::from(total_dof).expect("Failed to convert to float");

    let numerator = F::from(2 * p * p + 3 * p - 1).expect("Failed to convert to float");
    let denominator = F::from(6).expect("Failed to convert constant to float")
        * F::from(p + 1).expect("Failed to convert to float")
        * F::from(k - 1).expect("Failed to convert to float");
    let c1 = (numerator / denominator) * (sum_inv - inv_total);

    Ok(c1)
}

#[allow(dead_code)]
fn chi_square_survival_function<F>(x: F, df: usize) -> LinalgResult<F>
where
    F: Float + Zero + One + Copy + scirs2_core::numeric::FromPrimitive,
{
    // Simplified approximation for chi-square survival function
    // In practice, you'd use a proper implementation
    if x <= F::zero() {
        return Ok(F::one());
    }

    // Very rough approximation using normal approximation for large df
    if df > 30 {
        let z = (x - F::from(df).expect("Failed to convert to float"))
            / F::from(2 * df).expect("Failed to convert to float").sqrt();
        return Ok(standard_normal_survival_function(z));
    }

    // For small df, use a simple approximation
    let approx = (-x / F::from(2.0).expect("Failed to convert constant to float")).exp();
    Ok(approx.min(F::one()))
}

#[allow(dead_code)]
fn f_survival_function<F>(x: F, _df1: usize, df2: usize) -> LinalgResult<F>
where
    F: Float + Zero + One + Copy + scirs2_core::numeric::FromPrimitive,
{
    // Simplified approximation for F survival function
    if x <= F::zero() {
        return Ok(F::one());
    }

    // Very rough approximation
    let approx = F::one() / (F::one() + x);
    Ok(approx)
}

#[allow(dead_code)]
fn standard_normal_survival_function<F>(z: F) -> F
where
    F: Float + Zero + One + Copy + scirs2_core::numeric::FromPrimitive,
{
    // Simplified approximation for standard normal survival function
    if z <= F::zero() {
        return F::from(0.5).expect("Failed to convert constant to float");
    }

    // Very rough approximation using complementary error function approximation
    let approx = (-z * z / F::from(2.0).expect("Failed to convert constant to float")).exp()
        / (z * F::from(2.0 * PI)
            .expect("Failed to convert to float")
            .sqrt());
    approx.min(F::from(0.5).expect("Failed to convert constant to float"))
}

#[cfg(test)]
use scirs2_core::ndarray::array;

#[test]
#[allow(dead_code)]
fn test_hotelling_t2_test() {
    // Test data with some variation to avoid singular covariance matrix
    let data = array![
        [1.0, 2.1],
        [2.1, 2.9],
        [2.9, 4.1],
        [4.1, 4.8],
        [4.8, 6.2],
        [0.5, 1.8],
        [3.2, 3.7]
    ];

    let result = hotelling_t2_test(&data.view(), None, 0.05).expect("Test: operation failed");

    // Should produce finite statistics
    assert!(result.statistic.is_finite());
    assert!(result.p_value.is_finite());
    assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
}

#[test]
#[allow(dead_code)]
fn test_mauchly_sphericity_test() {
    // Test with identity-like covariance (should not reject sphericity)
    let data = array![
        [1.0, 0.0, 0.0],
        [0.0, 1.0, 0.0],
        [0.0, 0.0, 1.0],
        [1.0, 1.0, 1.0],
        [-1.0, -1.0, -1.0]
    ];

    match mauchly_sphericity_test(&data.view(), 0.05) {
        Ok(result) => {
            assert!(result.statistic.is_finite());
            assert!(result.p_value.is_finite());
            assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
        }
        Err(LinalgError::NotImplementedError(_)) => {
            // Symmetric eigenvalue decomposition not implemented for this size
            // This is acceptable for this test
        }
        Err(e) => panic!("Unexpected error: {:?}", e),
    }
}

#[test]
#[allow(dead_code)]
fn test_mardia_normality_test() {
    // Simple test data
    let data = array![
        [0.0, 0.0],
        [1.0, 1.0],
        [-1.0, -1.0],
        [0.5, -0.5],
        [-0.5, 0.5]
    ];

    let (skewness_result, kurtosis_result) =
        mardia_normality_test(&data.view(), 0.05).expect("Test: operation failed");

    assert!(skewness_result.statistic.is_finite());
    assert!(skewness_result.p_value.is_finite());
    assert!(kurtosis_result.statistic.is_finite());
    assert!(kurtosis_result.p_value.is_finite());
}

#[test]
#[allow(dead_code)]
fn test_box_m_test() {
    // Create two groups with more samples and variation to avoid singular matrices
    let group1 = array![
        [1.0, 0.1],
        [2.1, 1.2],
        [2.8, 1.9],
        [4.1, 3.2],
        [1.5, 0.8],
        [3.2, 2.1]
    ];

    let group2 = array![
        [0.2, 1.1],
        [1.1, 2.2],
        [1.9, 2.8],
        [3.2, 4.1],
        [0.8, 1.9],
        [2.5, 3.3]
    ];

    let groups = vec![group1.view(), group2.view()];
    let result = box_m_test(&groups, 0.05).expect("Test: operation failed");

    assert!(result.statistic.is_finite());
    assert!(result.p_value.is_finite());
    assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
}