fdars_core/spm/
stats.rs

1//! Hotelling T-squared and SPE (Squared Prediction Error) statistics.
2//!
3//! These are the two fundamental monitoring statistics for functional
4//! statistical process control: T-squared captures systematic variation
5//! in the principal component subspace, while SPE captures residual
6//! variation outside it.
7//!
8//! # Finite-sample distribution
9//!
10//! For finite calibration samples of size *n*, T² follows
11//! `(n·ncomp/(n−ncomp))·F(ncomp, n−ncomp)` exactly under Gaussian scores.
12//! The `χ²(ncomp)` limit is the large-sample limit as *n* → ∞.
13//!
14//! # Numerical stability
15//!
16//! If the ratio `max(eigenvalue)/min(eigenvalue)` exceeds ~10^6, the T²
17//! statistic becomes numerically sensitive. Use [`hotelling_t2_regularized`]
18//! in such cases.
19//!
20//! # Quadrature
21//!
22//! The Simpson's rule quadrature used for SPE integration has error
23//! O(h^4 * max|f''''|) where h is the grid spacing, giving excellent
24//! accuracy for smooth functional data.
25//!
26//! # References
27//!
28//! - Hotelling, H. (1947). Multivariate quality control. *Techniques of
29//!   Statistical Analysis*, pp. 111–113.
30//! - Bersimis, S., Psarakis, S. & Panaretos, J. (2007). Multivariate
31//!   statistical process control charts: an overview. *Quality and
32//!   Reliability Engineering International*, 23(5), 517–543. §2.1,
33//!   pp. 519–522.
34//!
35//! # Assumptions
36//!
37//! The statistics in this module assume the FPCA scores are approximately
38//! uncorrelated (diagonal covariance). This holds by construction when
39//! eigenvalues come from PCA/SVD. For non-PCA score vectors, use the full
40//! covariance Mahalanobis distance instead.
41
42use crate::error::FdarError;
43use crate::helpers::simpsons_weights;
44use crate::matrix::FdMatrix;
45
46/// Compute Hotelling T-squared statistic for each observation.
47///
48/// T-squared_i = sum_{l=1}^{L} scores_{i,l}^2 / eigenvalues_l
49///
50/// Under in-control conditions with Gaussian scores, T² follows
51/// approximately a chi²(ncomp) distribution (Hotelling, 1947, pp. 111–113).
52/// For finite calibration samples of size *n*, T² follows
53/// `(n·ncomp/(n−ncomp))·F(ncomp, n−ncomp)` exactly under Gaussian scores.
54/// The `χ²(ncomp)` limit is the large-sample limit as *n* → ∞.
55///
56/// Use [`t2_control_limit`](super::control::t2_control_limit) to obtain the
57/// corresponding upper control limit.
58///
59/// # Numerical stability
60///
61/// If the ratio `max(eigenvalue)/min(eigenvalue)` exceeds ~10^6, the T²
62/// statistic becomes numerically sensitive. Use [`hotelling_t2_regularized`]
63/// in such cases.
64///
65/// # Arguments
66/// * `scores` - FPC score matrix (n x ncomp)
67/// * `eigenvalues` - Eigenvalues (length ncomp): sv_l^2 / (n-1)
68///
69/// # Example
70///
71/// ```
72/// use fdars_core::matrix::FdMatrix;
73/// use fdars_core::spm::stats::hotelling_t2;
74/// let scores = FdMatrix::from_column_major(vec![1.0, 2.0, 3.0, 0.5, 1.0, 1.5], 3, 2).unwrap();
75/// let eigenvalues = vec![2.0, 1.0];
76/// let t2 = hotelling_t2(&scores, &eigenvalues).unwrap();
77/// assert_eq!(t2.len(), 3);
78/// assert!((t2[0] - 0.75).abs() < 1e-10); // 1²/2 + 0.5²/1
79/// ```
80///
81/// # Errors
82///
83/// Returns [`FdarError::InvalidDimension`] if scores columns != eigenvalues length.
84/// Returns [`FdarError::InvalidParameter`] if any eigenvalue is non-positive.
85#[must_use = "statistic result should not be discarded"]
86pub fn hotelling_t2(scores: &FdMatrix, eigenvalues: &[f64]) -> Result<Vec<f64>, FdarError> {
87    let (n, ncomp) = scores.shape();
88    if ncomp != eigenvalues.len() {
89        return Err(FdarError::InvalidDimension {
90            parameter: "eigenvalues",
91            expected: format!("{ncomp} (matching scores columns)"),
92            actual: format!("{}", eigenvalues.len()),
93        });
94    }
95    for (l, &ev) in eigenvalues.iter().enumerate() {
96        if ev <= 0.0 {
97            return Err(FdarError::InvalidParameter {
98                parameter: "eigenvalues",
99                message: format!("eigenvalue[{l}] = {ev} must be positive"),
100            });
101        }
102    }
103
104    let t2: Vec<f64> = (0..n)
105        .map(|i| {
106            let mut sum = 0.0;
107            for l in 0..ncomp {
108                let s = scores[(i, l)];
109                sum += s * s / eigenvalues[l];
110            }
111            sum
112        })
113        .collect();
114
115    Ok(t2)
116}
117
118/// Compute Hotelling T-squared with eigenvalue regularization.
119///
120/// Like [`hotelling_t2`], but applies a floor to eigenvalues to prevent
121/// numerical instability from near-zero values. Useful when some
122/// eigenvalues are very small (< 1e-8) which would cause numerical
123/// instability in standard T². The epsilon floor prevents division by
124/// near-zero eigenvalues.
125///
126/// # Condition number
127///
128/// If the ratio `max(eigenvalue)/min(eigenvalue)` exceeds ~10^6, the
129/// standard T² statistic becomes numerically sensitive. This function
130/// clamps small eigenvalues to `epsilon`, effectively bounding the
131/// condition number at `max(eigenvalue)/epsilon`.
132///
133/// Choosing epsilon: set epsilon approximately 1e-2 times the minimum
134/// eigenvalue to regularize without substantially altering the statistic.
135/// The default regularization in `spm_monitor` uses this approach. For
136/// manual use, epsilon = 1e-6 is a safe general-purpose default that
137/// handles eigenvalue ratios up to 1e6.
138///
139/// # Arguments
140/// * `scores` - Score matrix (n x ncomp)
141/// * `eigenvalues` - Eigenvalues (length ncomp)
142/// * `epsilon` - Minimum eigenvalue floor (values below this are set to epsilon)
143///
144/// # Errors
145///
146/// Returns [`FdarError::InvalidDimension`] if shapes are inconsistent.
147/// Returns [`FdarError::InvalidParameter`] if epsilon is non-positive.
148#[must_use = "statistic result should not be discarded"]
149pub fn hotelling_t2_regularized(
150    scores: &FdMatrix,
151    eigenvalues: &[f64],
152    epsilon: f64,
153) -> Result<Vec<f64>, FdarError> {
154    if epsilon <= 0.0 {
155        return Err(FdarError::InvalidParameter {
156            parameter: "epsilon",
157            message: format!("epsilon must be positive, got {epsilon}"),
158        });
159    }
160    let (n, ncomp) = scores.shape();
161    if ncomp != eigenvalues.len() {
162        return Err(FdarError::InvalidDimension {
163            parameter: "eigenvalues",
164            expected: format!("{ncomp}"),
165            actual: format!("{}", eigenvalues.len()),
166        });
167    }
168
169    let mut t2 = vec![0.0; n];
170    for i in 0..n {
171        for l in 0..ncomp {
172            let ev = eigenvalues[l].max(epsilon);
173            t2[i] += scores[(i, l)] * scores[(i, l)] / ev;
174        }
175    }
176    Ok(t2)
177}
178
179/// Compute SPE (Squared Prediction Error) for univariate functional data.
180///
181/// SPE_i = integral (x_centered_i(t) - x_reconstructed_i(t))^2 w(t) dt
182///
183/// Requires `argvals` to be sorted in ascending order with at least 3
184/// points for Simpson's rule integration. Non-uniform spacing is handled
185/// correctly by the quadrature weights.
186///
187/// # Quadrature accuracy
188///
189/// The Simpson's rule quadrature has error O(h^4 * max|f''''|) where h is
190/// the grid spacing, giving excellent accuracy for smooth functional data
191/// (Bersimis et al., 2007, §2.1, pp. 519–522).
192///
193/// # Arguments
194/// * `centered` - Centered functional data (n x m)
195/// * `reconstructed` - Reconstructed data from FPCA (n x m), already centered (mean subtracted)
196/// * `argvals` - Grid points (length m)
197///
198/// # Example
199///
200/// ```
201/// use fdars_core::matrix::FdMatrix;
202/// use fdars_core::spm::stats::spe_univariate;
203/// let centered = FdMatrix::from_column_major(vec![1.0, 0.0, 0.0, 1.0], 2, 2).unwrap();
204/// let reconstructed = FdMatrix::from_column_major(vec![0.0; 4], 2, 2).unwrap();
205/// let argvals = vec![0.0, 1.0];
206/// let spe = spe_univariate(&centered, &reconstructed, &argvals).unwrap();
207/// assert_eq!(spe.len(), 2);
208/// assert!(spe[0] > 0.0);
209/// ```
210///
211/// # Errors
212///
213/// Returns [`FdarError::InvalidDimension`] if shapes do not match.
214#[must_use = "statistic result should not be discarded"]
215pub fn spe_univariate(
216    centered: &FdMatrix,
217    reconstructed: &FdMatrix,
218    argvals: &[f64],
219) -> Result<Vec<f64>, FdarError> {
220    let (n, m) = centered.shape();
221    if reconstructed.shape() != (n, m) {
222        return Err(FdarError::InvalidDimension {
223            parameter: "reconstructed",
224            expected: format!("{n}x{m}"),
225            actual: format!("{}x{}", reconstructed.nrows(), reconstructed.ncols()),
226        });
227    }
228    if argvals.len() != m {
229        return Err(FdarError::InvalidDimension {
230            parameter: "argvals",
231            expected: format!("{m}"),
232            actual: format!("{}", argvals.len()),
233        });
234    }
235
236    let weights = simpsons_weights(argvals);
237
238    let spe: Vec<f64> = (0..n)
239        .map(|i| {
240            let mut sum = 0.0;
241            for j in 0..m {
242                let diff = centered[(i, j)] - reconstructed[(i, j)];
243                sum += diff * diff * weights[j];
244            }
245            sum
246        })
247        .collect();
248
249    Ok(spe)
250}
251
252/// Compute SPE for multivariate functional data.
253///
254/// Sum of per-variable integrated squared differences, each variable using
255/// its own grid-specific integration weights. Each entry in `argvals_list`
256/// must be sorted in ascending order with at least 3 points for Simpson's
257/// rule integration. Non-uniform spacing is handled correctly by the
258/// quadrature weights.
259///
260/// # Quadrature accuracy
261///
262/// Each variable's contribution uses Simpson's rule with error
263/// O(h^4 * max|f''''|) where h is the per-variable grid spacing.
264///
265/// # Arguments
266/// * `standardized_vars` - Per-variable standardized centered data (each n x m_p)
267/// * `reconstructed_vars` - Per-variable reconstructed data (each n x m_p)
268/// * `argvals_list` - Per-variable grid points
269///
270/// # Errors
271///
272/// Returns [`FdarError::InvalidDimension`] if the number of variables is inconsistent
273/// or any shapes do not match.
274#[must_use = "statistic result should not be discarded"]
275pub fn spe_multivariate(
276    standardized_vars: &[&FdMatrix],
277    reconstructed_vars: &[&FdMatrix],
278    argvals_list: &[&[f64]],
279) -> Result<Vec<f64>, FdarError> {
280    let p = standardized_vars.len();
281    if reconstructed_vars.len() != p || argvals_list.len() != p {
282        return Err(FdarError::InvalidDimension {
283            parameter: "variables",
284            expected: format!("{p} variables for all arguments"),
285            actual: format!(
286                "std={}, recon={}, argvals={}",
287                standardized_vars.len(),
288                reconstructed_vars.len(),
289                argvals_list.len()
290            ),
291        });
292    }
293
294    if p == 0 {
295        return Ok(Vec::new());
296    }
297
298    let n = standardized_vars[0].nrows();
299
300    let mut spe = vec![0.0; n];
301
302    for v in 0..p {
303        let std_v = standardized_vars[v];
304        let rec_v = reconstructed_vars[v];
305        let argvals = argvals_list[v];
306
307        if std_v.nrows() != n || rec_v.nrows() != n {
308            return Err(FdarError::InvalidDimension {
309                parameter: "variables",
310                expected: format!("{n} rows for variable {v}"),
311                actual: format!("std={}, recon={}", std_v.nrows(), rec_v.nrows()),
312            });
313        }
314
315        let m_v = std_v.ncols();
316        if rec_v.ncols() != m_v || argvals.len() != m_v {
317            return Err(FdarError::InvalidDimension {
318                parameter: "argvals_list",
319                expected: format!("{m_v} for variable {v}"),
320                actual: format!(
321                    "recon_cols={}, argvals_len={}",
322                    rec_v.ncols(),
323                    argvals.len()
324                ),
325            });
326        }
327
328        let weights = simpsons_weights(argvals);
329
330        for i in 0..n {
331            let mut var_spe = 0.0;
332            for j in 0..m_v {
333                let diff = std_v[(i, j)] - rec_v[(i, j)];
334                var_spe += diff * diff * weights[j];
335            }
336            spe[i] += var_spe;
337        }
338    }
339
340    Ok(spe)
341}
fdars_core/spm/stats.rs

fdars_core/spm/
stats.rs