fdars_core/spm/stats.rs
1//! Hotelling T-squared and SPE (Squared Prediction Error) statistics.
2//!
3//! These are the two fundamental monitoring statistics for functional
4//! statistical process control: T-squared captures systematic variation
5//! in the principal component subspace, while SPE captures residual
6//! variation outside it.
7//!
8//! # Finite-sample distribution
9//!
10//! For finite calibration samples of size *n*, T² follows
11//! `(n·ncomp/(n−ncomp))·F(ncomp, n−ncomp)` exactly under Gaussian scores.
12//! The `χ²(ncomp)` limit is the large-sample limit as *n* → ∞.
13//!
14//! # Numerical stability
15//!
16//! If the ratio `max(eigenvalue)/min(eigenvalue)` exceeds ~10^6, the T²
17//! statistic becomes numerically sensitive. Use [`hotelling_t2_regularized`]
18//! in such cases.
19//!
20//! # Quadrature
21//!
22//! The Simpson's rule quadrature used for SPE integration has error
23//! O(h^4 * max|f''''|) where h is the grid spacing, giving excellent
24//! accuracy for smooth functional data.
25//!
26//! # References
27//!
28//! - Hotelling, H. (1947). Multivariate quality control. *Techniques of
29//! Statistical Analysis*, pp. 111–113.
30//! - Bersimis, S., Psarakis, S. & Panaretos, J. (2007). Multivariate
31//! statistical process control charts: an overview. *Quality and
32//! Reliability Engineering International*, 23(5), 517–543. §2.1,
33//! pp. 519–522.
34//!
35//! # Assumptions
36//!
37//! The statistics in this module assume the FPCA scores are approximately
38//! uncorrelated (diagonal covariance). This holds by construction when
39//! eigenvalues come from PCA/SVD. For non-PCA score vectors, use the full
40//! covariance Mahalanobis distance instead.
41
42use crate::error::FdarError;
43use crate::helpers::simpsons_weights;
44use crate::matrix::FdMatrix;
45
46/// Compute Hotelling T-squared statistic for each observation.
47///
48/// T-squared_i = sum_{l=1}^{L} scores_{i,l}^2 / eigenvalues_l
49///
50/// Under in-control conditions with Gaussian scores, T² follows
51/// approximately a chi²(ncomp) distribution (Hotelling, 1947, pp. 111–113).
52/// For finite calibration samples of size *n*, T² follows
53/// `(n·ncomp/(n−ncomp))·F(ncomp, n−ncomp)` exactly under Gaussian scores.
54/// The `χ²(ncomp)` limit is the large-sample limit as *n* → ∞.
55///
56/// Use [`t2_control_limit`](super::control::t2_control_limit) to obtain the
57/// corresponding upper control limit.
58///
59/// # Numerical stability
60///
61/// If the ratio `max(eigenvalue)/min(eigenvalue)` exceeds ~10^6, the T²
62/// statistic becomes numerically sensitive. Use [`hotelling_t2_regularized`]
63/// in such cases.
64///
65/// # Arguments
66/// * `scores` - FPC score matrix (n x ncomp)
67/// * `eigenvalues` - Eigenvalues (length ncomp): sv_l^2 / (n-1)
68///
69/// # Example
70///
71/// ```
72/// use fdars_core::matrix::FdMatrix;
73/// use fdars_core::spm::stats::hotelling_t2;
74/// let scores = FdMatrix::from_column_major(vec![1.0, 2.0, 3.0, 0.5, 1.0, 1.5], 3, 2).unwrap();
75/// let eigenvalues = vec![2.0, 1.0];
76/// let t2 = hotelling_t2(&scores, &eigenvalues).unwrap();
77/// assert_eq!(t2.len(), 3);
78/// assert!((t2[0] - 0.75).abs() < 1e-10); // 1²/2 + 0.5²/1
79/// ```
80///
81/// # Errors
82///
83/// Returns [`FdarError::InvalidDimension`] if scores columns != eigenvalues length.
84/// Returns [`FdarError::InvalidParameter`] if any eigenvalue is non-positive.
85#[must_use = "statistic result should not be discarded"]
86pub fn hotelling_t2(scores: &FdMatrix, eigenvalues: &[f64]) -> Result<Vec<f64>, FdarError> {
87 let (n, ncomp) = scores.shape();
88 if ncomp != eigenvalues.len() {
89 return Err(FdarError::InvalidDimension {
90 parameter: "eigenvalues",
91 expected: format!("{ncomp} (matching scores columns)"),
92 actual: format!("{}", eigenvalues.len()),
93 });
94 }
95 for (l, &ev) in eigenvalues.iter().enumerate() {
96 if ev <= 0.0 {
97 return Err(FdarError::InvalidParameter {
98 parameter: "eigenvalues",
99 message: format!("eigenvalue[{l}] = {ev} must be positive"),
100 });
101 }
102 }
103
104 let t2: Vec<f64> = (0..n)
105 .map(|i| {
106 let mut sum = 0.0;
107 for l in 0..ncomp {
108 let s = scores[(i, l)];
109 sum += s * s / eigenvalues[l];
110 }
111 sum
112 })
113 .collect();
114
115 Ok(t2)
116}
117
118/// Compute Hotelling T-squared with eigenvalue regularization.
119///
120/// Like [`hotelling_t2`], but applies a floor to eigenvalues to prevent
121/// numerical instability from near-zero values. Useful when some
122/// eigenvalues are very small (< 1e-8) which would cause numerical
123/// instability in standard T². The epsilon floor prevents division by
124/// near-zero eigenvalues.
125///
126/// # Condition number
127///
128/// If the ratio `max(eigenvalue)/min(eigenvalue)` exceeds ~10^6, the
129/// standard T² statistic becomes numerically sensitive. This function
130/// clamps small eigenvalues to `epsilon`, effectively bounding the
131/// condition number at `max(eigenvalue)/epsilon`.
132///
133/// Choosing epsilon: set epsilon approximately 1e-2 times the minimum
134/// eigenvalue to regularize without substantially altering the statistic.
135/// The default regularization in `spm_monitor` uses this approach. For
136/// manual use, epsilon = 1e-6 is a safe general-purpose default that
137/// handles eigenvalue ratios up to 1e6.
138///
139/// # Arguments
140/// * `scores` - Score matrix (n x ncomp)
141/// * `eigenvalues` - Eigenvalues (length ncomp)
142/// * `epsilon` - Minimum eigenvalue floor (values below this are set to epsilon)
143///
144/// # Errors
145///
146/// Returns [`FdarError::InvalidDimension`] if shapes are inconsistent.
147/// Returns [`FdarError::InvalidParameter`] if epsilon is non-positive.
148#[must_use = "statistic result should not be discarded"]
149pub fn hotelling_t2_regularized(
150 scores: &FdMatrix,
151 eigenvalues: &[f64],
152 epsilon: f64,
153) -> Result<Vec<f64>, FdarError> {
154 if epsilon <= 0.0 {
155 return Err(FdarError::InvalidParameter {
156 parameter: "epsilon",
157 message: format!("epsilon must be positive, got {epsilon}"),
158 });
159 }
160 let (n, ncomp) = scores.shape();
161 if ncomp != eigenvalues.len() {
162 return Err(FdarError::InvalidDimension {
163 parameter: "eigenvalues",
164 expected: format!("{ncomp}"),
165 actual: format!("{}", eigenvalues.len()),
166 });
167 }
168
169 let mut t2 = vec![0.0; n];
170 for i in 0..n {
171 for l in 0..ncomp {
172 let ev = eigenvalues[l].max(epsilon);
173 t2[i] += scores[(i, l)] * scores[(i, l)] / ev;
174 }
175 }
176 Ok(t2)
177}
178
179/// Compute SPE (Squared Prediction Error) for univariate functional data.
180///
181/// SPE_i = integral (x_centered_i(t) - x_reconstructed_i(t))^2 w(t) dt
182///
183/// Requires `argvals` to be sorted in ascending order with at least 3
184/// points for Simpson's rule integration. Non-uniform spacing is handled
185/// correctly by the quadrature weights.
186///
187/// # Quadrature accuracy
188///
189/// The Simpson's rule quadrature has error O(h^4 * max|f''''|) where h is
190/// the grid spacing, giving excellent accuracy for smooth functional data
191/// (Bersimis et al., 2007, §2.1, pp. 519–522).
192///
193/// # Arguments
194/// * `centered` - Centered functional data (n x m)
195/// * `reconstructed` - Reconstructed data from FPCA (n x m), already centered (mean subtracted)
196/// * `argvals` - Grid points (length m)
197///
198/// # Example
199///
200/// ```
201/// use fdars_core::matrix::FdMatrix;
202/// use fdars_core::spm::stats::spe_univariate;
203/// let centered = FdMatrix::from_column_major(vec![1.0, 0.0, 0.0, 1.0], 2, 2).unwrap();
204/// let reconstructed = FdMatrix::from_column_major(vec![0.0; 4], 2, 2).unwrap();
205/// let argvals = vec![0.0, 1.0];
206/// let spe = spe_univariate(¢ered, &reconstructed, &argvals).unwrap();
207/// assert_eq!(spe.len(), 2);
208/// assert!(spe[0] > 0.0);
209/// ```
210///
211/// # Errors
212///
213/// Returns [`FdarError::InvalidDimension`] if shapes do not match.
214#[must_use = "statistic result should not be discarded"]
215pub fn spe_univariate(
216 centered: &FdMatrix,
217 reconstructed: &FdMatrix,
218 argvals: &[f64],
219) -> Result<Vec<f64>, FdarError> {
220 let (n, m) = centered.shape();
221 if reconstructed.shape() != (n, m) {
222 return Err(FdarError::InvalidDimension {
223 parameter: "reconstructed",
224 expected: format!("{n}x{m}"),
225 actual: format!("{}x{}", reconstructed.nrows(), reconstructed.ncols()),
226 });
227 }
228 if argvals.len() != m {
229 return Err(FdarError::InvalidDimension {
230 parameter: "argvals",
231 expected: format!("{m}"),
232 actual: format!("{}", argvals.len()),
233 });
234 }
235
236 let weights = simpsons_weights(argvals);
237
238 let spe: Vec<f64> = (0..n)
239 .map(|i| {
240 let mut sum = 0.0;
241 for j in 0..m {
242 let diff = centered[(i, j)] - reconstructed[(i, j)];
243 sum += diff * diff * weights[j];
244 }
245 sum
246 })
247 .collect();
248
249 Ok(spe)
250}
251
252/// Compute SPE for multivariate functional data.
253///
254/// Sum of per-variable integrated squared differences, each variable using
255/// its own grid-specific integration weights. Each entry in `argvals_list`
256/// must be sorted in ascending order with at least 3 points for Simpson's
257/// rule integration. Non-uniform spacing is handled correctly by the
258/// quadrature weights.
259///
260/// # Quadrature accuracy
261///
262/// Each variable's contribution uses Simpson's rule with error
263/// O(h^4 * max|f''''|) where h is the per-variable grid spacing.
264///
265/// # Arguments
266/// * `standardized_vars` - Per-variable standardized centered data (each n x m_p)
267/// * `reconstructed_vars` - Per-variable reconstructed data (each n x m_p)
268/// * `argvals_list` - Per-variable grid points
269///
270/// # Errors
271///
272/// Returns [`FdarError::InvalidDimension`] if the number of variables is inconsistent
273/// or any shapes do not match.
274#[must_use = "statistic result should not be discarded"]
275pub fn spe_multivariate(
276 standardized_vars: &[&FdMatrix],
277 reconstructed_vars: &[&FdMatrix],
278 argvals_list: &[&[f64]],
279) -> Result<Vec<f64>, FdarError> {
280 let p = standardized_vars.len();
281 if reconstructed_vars.len() != p || argvals_list.len() != p {
282 return Err(FdarError::InvalidDimension {
283 parameter: "variables",
284 expected: format!("{p} variables for all arguments"),
285 actual: format!(
286 "std={}, recon={}, argvals={}",
287 standardized_vars.len(),
288 reconstructed_vars.len(),
289 argvals_list.len()
290 ),
291 });
292 }
293
294 if p == 0 {
295 return Ok(Vec::new());
296 }
297
298 let n = standardized_vars[0].nrows();
299
300 let mut spe = vec![0.0; n];
301
302 for v in 0..p {
303 let std_v = standardized_vars[v];
304 let rec_v = reconstructed_vars[v];
305 let argvals = argvals_list[v];
306
307 if std_v.nrows() != n || rec_v.nrows() != n {
308 return Err(FdarError::InvalidDimension {
309 parameter: "variables",
310 expected: format!("{n} rows for variable {v}"),
311 actual: format!("std={}, recon={}", std_v.nrows(), rec_v.nrows()),
312 });
313 }
314
315 let m_v = std_v.ncols();
316 if rec_v.ncols() != m_v || argvals.len() != m_v {
317 return Err(FdarError::InvalidDimension {
318 parameter: "argvals_list",
319 expected: format!("{m_v} for variable {v}"),
320 actual: format!(
321 "recon_cols={}, argvals_len={}",
322 rec_v.ncols(),
323 argvals.len()
324 ),
325 });
326 }
327
328 let weights = simpsons_weights(argvals);
329
330 for i in 0..n {
331 let mut var_spe = 0.0;
332 for j in 0..m_v {
333 let diff = std_v[(i, j)] - rec_v[(i, j)];
334 var_spe += diff * diff * weights[j];
335 }
336 spe[i] += var_spe;
337 }
338 }
339
340 Ok(spe)
341}