Skip to main content

fdars_core/spm/
phase.rs

1//! Phase I/II framework for statistical process monitoring.
2//!
3//! Phase I: Builds a monitoring chart from historical in-control data by
4//! splitting into tuning (for FPCA) and calibration (for control limits) sets.
5//!
6//! Phase II: Monitors new observations against the established chart.
7//!
8//! Both univariate and multivariate variants are provided.
9//!
10//! # References
11//!
12//! - Horváth, L. & Kokoszka, P. (2012). *Inference for Functional Data
13//!   with Applications*, Chapter 13, pp. 323--352. Springer.
14//! - Flores, M., Naya, S., Fernández-Casal, R. & Zaragoza, S. (2022).
15//!   Constructing a control chart using functional data, §2, Algorithm 1.
16//!   *Mathematics*, 8(1), 58.
17
18use crate::error::FdarError;
19use crate::matrix::FdMatrix;
20use crate::regression::{fdata_to_pc_1d, FpcaResult};
21
22use super::control::{spe_control_limit, t2_control_limit, ControlLimit};
23use super::mfpca::{mfpca, MfpcaConfig, MfpcaResult};
24use super::stats::{hotelling_t2, spe_multivariate, spe_univariate};
25
26/// Configuration for SPM chart construction.
27#[derive(Debug, Clone, PartialEq)]
28pub struct SpmConfig {
29    /// Number of principal components to retain (default 5).
30    ///
31    /// Typical range: 3--10. Use [`select_ncomp()`](super::ncomp::select_ncomp)
32    /// for data-driven selection. More components capture finer structure but
33    /// increase dimensionality of the monitoring statistic.
34    pub ncomp: usize,
35    /// Significance level for control limits (default 0.05).
36    pub alpha: f64,
37    /// Fraction of data used for tuning/FPCA (default 0.5).
38    ///
39    /// The remainder forms the calibration set for control limits. Default 0.5
40    /// balances FPCA estimation quality against control limit precision. With
41    /// small datasets (n < 50), consider 0.6--0.7 to ensure adequate FPCA
42    /// estimation.
43    ///
44    /// The tuning/calibration split induces a bias-variance trade-off: a larger
45    /// tuning fraction yields better FPCA eigenfunction estimates but less
46    /// precise control limits. The optimal split depends on the eigenvalue
47    /// decay rate — fast decay (smooth processes) favors allocating more data
48    /// to calibration, while slow decay (rough processes) favors more tuning
49    /// data for stable FPCA estimation.
50    pub tuning_fraction: f64,
51    /// Random seed for data splitting (default 42).
52    pub seed: u64,
53}
54
55impl Default for SpmConfig {
56    fn default() -> Self {
57        Self {
58            ncomp: 5,
59            alpha: 0.05,
60            tuning_fraction: 0.5,
61            seed: 42,
62        }
63    }
64}
65
66/// Univariate SPM chart from Phase I.
67///
68/// The chart assumes approximate multivariate normality in the score space.
69/// For non-Gaussian functional data, the chi-squared control limits are
70/// approximate. Use `t2_limit_robust()` with bootstrap method for
71/// distribution-free limits.
72///
73/// For finite calibration samples, the exact Hotelling T^2 distribution is
74/// `(n_cal * ncomp / (n_cal - ncomp)) * F(ncomp, n_cal - ncomp)`. The
75/// chi-squared limit `chi2(ncomp)` is asymptotically exact as `n_cal -> inf`,
76/// but can be anti-conservative for small calibration sets (n_cal < 10 *
77/// ncomp). When the calibration set is small, prefer bootstrap-based limits.
78#[derive(Debug, Clone, PartialEq)]
79#[non_exhaustive]
80pub struct SpmChart {
81    /// FPCA result from the tuning set.
82    pub fpca: FpcaResult,
83    /// Eigenvalues lambda_l = s_l^2 / (n_tune - 1), where s_l are singular
84    /// values from the SVD of the centered data matrix X = U Sigma V^T. This
85    /// gives the sample covariance eigenvalues since Cov = X^T X / (n-1) has
86    /// eigenvalues s_l^2 / (n-1).
87    ///
88    /// The actual number of components may be fewer than `config.ncomp` if
89    /// limited by sample size or grid resolution.
90    pub eigenvalues: Vec<f64>,
91    /// T-squared values for the calibration set.
92    pub t2_phase1: Vec<f64>,
93    /// SPE values for the calibration set.
94    pub spe_phase1: Vec<f64>,
95    /// T-squared control limit.
96    pub t2_limit: ControlLimit,
97    /// SPE control limit.
98    pub spe_limit: ControlLimit,
99    /// Configuration used to build the chart.
100    pub config: SpmConfig,
101    /// Whether the sample size meets the recommended minimum (10 × ncomp).
102    /// False indicates results may be unstable.
103    pub sample_size_adequate: bool,
104}
105
106/// Multivariate SPM chart from Phase I.
107#[derive(Debug, Clone, PartialEq)]
108#[non_exhaustive]
109pub struct MfSpmChart {
110    /// MFPCA result from the tuning set.
111    pub mfpca: MfpcaResult,
112    /// T-squared values for the calibration set.
113    pub t2_phase1: Vec<f64>,
114    /// SPE values for the calibration set.
115    pub spe_phase1: Vec<f64>,
116    /// T-squared control limit.
117    pub t2_limit: ControlLimit,
118    /// SPE control limit.
119    pub spe_limit: ControlLimit,
120    /// Configuration used to build the chart.
121    pub config: SpmConfig,
122}
123
124/// Result of Phase II monitoring.
125#[derive(Debug, Clone, PartialEq)]
126#[non_exhaustive]
127pub struct SpmMonitorResult {
128    /// T-squared values for new observations.
129    pub t2: Vec<f64>,
130    /// SPE values for new observations.
131    pub spe: Vec<f64>,
132    /// T-squared alarm flags.
133    pub t2_alarm: Vec<bool>,
134    /// SPE alarm flags.
135    pub spe_alarm: Vec<bool>,
136    /// Score matrix for new observations.
137    pub scores: FdMatrix,
138}
139
140/// Split indices into tuning and calibration sets.
141///
142/// Uses a deterministic Fisher-Yates shuffle with PCG-XSH-RR output function
143/// (O'Neill, 2014, §4.1, p. 14) for high-quality uniform sampling. PCG-XSH-RR
144/// has period 2^64 and passes the full TestU01 BigCrush battery. The same seed
145/// always produces the same split, ensuring reproducibility.
146pub(super) fn split_indices(n: usize, tuning_fraction: f64, seed: u64) -> (Vec<usize>, Vec<usize>) {
147    let n_tune = ((n as f64 * tuning_fraction).round() as usize)
148        .max(2)
149        .min(n - 1);
150
151    // Generate a deterministic permutation using PCG-XSH-RR
152    let mut indices: Vec<usize> = (0..n).collect();
153    let mut rng_state: u64 = seed;
154    for i in (1..n).rev() {
155        let j = pcg_next(&mut rng_state) as usize % (i + 1);
156        indices.swap(i, j);
157    }
158
159    let tune_indices: Vec<usize> = indices[..n_tune].to_vec();
160    let cal_indices: Vec<usize> = indices[n_tune..].to_vec();
161    (tune_indices, cal_indices)
162}
163
164/// PCG-XSH-RR 64→32 output function (O'Neill, 2014).
165///
166/// PCG-XSH-RR (O'Neill, 2014) produces uniformly distributed 32-bit outputs
167/// from a 64-bit LCG state. The XSH-RR output function (xor-shift, random
168/// rotation) provides excellent statistical quality (passes TestU01 BigCrush).
169/// Used here for Fisher-Yates shuffle in `split_indices`.
170fn pcg_next(state: &mut u64) -> u32 {
171    let old = *state;
172    *state = old.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
173    let xorshifted = (((old >> 18) ^ old) >> 27) as u32;
174    let rot = (old >> 59) as u32;
175    xorshifted.rotate_right(rot)
176}
177
178/// Compute centered reconstruction (without adding back mean) for SPE.
179///
180/// Returns the centered reconstruction: scores * rotation^T (no mean added).
181pub(super) fn centered_reconstruct(fpca: &FpcaResult, scores: &FdMatrix, ncomp: usize) -> FdMatrix {
182    let n = scores.nrows();
183    let m = fpca.mean.len();
184    let ncomp = ncomp.min(fpca.rotation.ncols()).min(scores.ncols());
185
186    let mut recon = FdMatrix::zeros(n, m);
187    for i in 0..n {
188        for j in 0..m {
189            let mut val = 0.0;
190            for k in 0..ncomp {
191                val += scores[(i, k)] * fpca.rotation[(j, k)];
192            }
193            recon[(i, j)] = val;
194        }
195    }
196    recon
197}
198
199/// Compute centered data for new observations (data - mean).
200pub(super) fn center_data(data: &FdMatrix, mean: &[f64]) -> FdMatrix {
201    let (n, m) = data.shape();
202    let mut centered = FdMatrix::zeros(n, m);
203    for i in 0..n {
204        for j in 0..m {
205            centered[(i, j)] = data[(i, j)] - mean[j];
206        }
207    }
208    centered
209}
210
211/// Build a univariate SPM chart from Phase I data.
212///
213/// # Arguments
214/// * `data` - In-control functional data (n x m)
215/// * `argvals` - Grid points (length m)
216/// * `config` - SPM configuration
217///
218/// # Errors
219///
220/// Returns errors from FPCA, T-squared computation, or control limit estimation.
221///
222/// # Assumptions
223///
224/// - The in-control data should be approximately normally distributed in the
225///   score space. Departures from normality may inflate false alarm rates.
226/// - Recommended minimum sample size: at least 10 × ncomp observations to
227///   ensure stable FPCA estimation (Horváth & Kokoszka, 2012).
228/// - The tuning/calibration split means the effective sample for each step
229///   is smaller than `n`. Ensure each subset has at least 2 × ncomp rows.
230///
231/// # Example
232/// ```
233/// use fdars_core::matrix::FdMatrix;
234/// use fdars_core::spm::phase::{spm_phase1, SpmConfig};
235/// let data = FdMatrix::from_column_major(
236///     (0..200).map(|i| (i as f64 * 0.1).sin()).collect(), 20, 10
237/// ).unwrap();
238/// let argvals: Vec<f64> = (0..10).map(|i| i as f64 / 9.0).collect();
239/// let config = SpmConfig { ncomp: 2, ..SpmConfig::default() };
240/// let chart = spm_phase1(&data, &argvals, &config).unwrap();
241/// assert!(chart.eigenvalues.len() <= 2);
242/// assert!(chart.t2_limit.ucl > 0.0);
243/// ```
244#[must_use = "expensive computation whose result should not be discarded"]
245pub fn spm_phase1(
246    data: &FdMatrix,
247    argvals: &[f64],
248    config: &SpmConfig,
249) -> Result<SpmChart, FdarError> {
250    let (n, m) = data.shape();
251    if n < 4 {
252        return Err(FdarError::InvalidDimension {
253            parameter: "data",
254            expected: "at least 4 observations for tuning/calibration split".to_string(),
255            actual: format!("{n} observations"),
256        });
257    }
258    let sample_size_adequate = n >= 10 * config.ncomp;
259    if argvals.len() != m {
260        return Err(FdarError::InvalidDimension {
261            parameter: "argvals",
262            expected: format!("{m}"),
263            actual: format!("{}", argvals.len()),
264        });
265    }
266
267    // Split into tuning and calibration
268    let (tune_idx, cal_idx) = split_indices(n, config.tuning_fraction, config.seed);
269
270    let tune_data = crate::cv::subset_rows(data, &tune_idx);
271    let cal_data = crate::cv::subset_rows(data, &cal_idx);
272    let n_tune = tune_data.nrows();
273    if n_tune < 3 {
274        return Err(FdarError::InvalidDimension {
275            parameter: "data",
276            expected: "tuning set with at least 3 observations".to_string(),
277            actual: format!(
278                "{n_tune} observations in tuning set (increase data size or tuning_fraction)"
279            ),
280        });
281    }
282
283    // FPCA on tuning set.
284    // Clamp ncomp to at most (n_tune - 1) and m to avoid rank-deficient SVD.
285    // The actual number of retained components may therefore be fewer than
286    // config.ncomp; this is reflected in the chart's eigenvalues length.
287    let ncomp = config.ncomp.min(n_tune - 1).min(m);
288    let fpca = fdata_to_pc_1d(&tune_data, ncomp)?;
289    let actual_ncomp = fpca.scores.ncols();
290
291    // Eigenvalues are computed as λ_l = s_l² / (n-1) where s_l is the l-th
292    // singular value from SVD of the centered data. This gives the sample
293    // variance explained by each PC, consistent with the covariance-based PCA
294    // formulation (cov = X'X / (n-1), whose eigenvalues are s_l² / (n-1)).
295    let eigenvalues: Vec<f64> = fpca
296        .singular_values
297        .iter()
298        .take(actual_ncomp)
299        .map(|&sv| sv * sv / (n_tune as f64 - 1.0))
300        .collect();
301
302    // Project calibration set
303    let cal_scores = fpca.project(&cal_data)?;
304
305    // T-squared on calibration
306    let t2_phase1 = hotelling_t2(&cal_scores, &eigenvalues)?;
307
308    // SPE on calibration: need centered and reconstructed
309    let cal_centered = center_data(&cal_data, &fpca.mean);
310    let cal_recon_centered = centered_reconstruct(&fpca, &cal_scores, actual_ncomp);
311    let spe_phase1 = spe_univariate(&cal_centered, &cal_recon_centered, argvals)?;
312
313    // Control limits
314    let t2_limit = t2_control_limit(actual_ncomp, config.alpha)?;
315    let spe_limit = spe_control_limit(&spe_phase1, config.alpha)?;
316
317    Ok(SpmChart {
318        fpca,
319        eigenvalues,
320        t2_phase1,
321        spe_phase1,
322        t2_limit,
323        spe_limit,
324        config: config.clone(),
325        sample_size_adequate,
326    })
327}
328
329/// Monitor new univariate functional data against an established SPM chart.
330///
331/// # Arguments
332/// * `chart` - Phase I SPM chart
333/// * `new_data` - New functional observations (n_new x m)
334/// * `argvals` - Grid points (length m)
335///
336/// # Errors
337///
338/// Returns errors from projection or statistic computation.
339///
340/// # Example
341/// ```
342/// use fdars_core::matrix::FdMatrix;
343/// use fdars_core::spm::phase::{spm_phase1, spm_monitor, SpmConfig};
344/// let data = FdMatrix::from_column_major(
345///     (0..200).map(|i| (i as f64 * 0.1).sin()).collect(), 20, 10
346/// ).unwrap();
347/// let argvals: Vec<f64> = (0..10).map(|i| i as f64 / 9.0).collect();
348/// let config = SpmConfig { ncomp: 2, ..SpmConfig::default() };
349/// let chart = spm_phase1(&data, &argvals, &config).unwrap();
350/// let new_data = FdMatrix::from_column_major(
351///     (0..50).map(|i| (i as f64 * 0.1).sin()).collect(), 5, 10
352/// ).unwrap();
353/// let result = spm_monitor(&chart, &new_data, &argvals).unwrap();
354/// assert_eq!(result.t2.len(), 5);
355/// ```
356#[must_use = "monitoring result should not be discarded"]
357pub fn spm_monitor(
358    chart: &SpmChart,
359    new_data: &FdMatrix,
360    argvals: &[f64],
361) -> Result<SpmMonitorResult, FdarError> {
362    let m = chart.fpca.mean.len();
363    if new_data.ncols() != m {
364        return Err(FdarError::InvalidDimension {
365            parameter: "new_data",
366            expected: format!("{m} columns"),
367            actual: format!("{} columns", new_data.ncols()),
368        });
369    }
370
371    let ncomp = chart.eigenvalues.len();
372
373    // Project new data
374    let scores = chart.fpca.project(new_data)?;
375
376    // T-squared
377    let t2 = hotelling_t2(&scores, &chart.eigenvalues)?;
378
379    // SPE
380    let centered = center_data(new_data, &chart.fpca.mean);
381    let recon_centered = centered_reconstruct(&chart.fpca, &scores, ncomp);
382    let spe = spe_univariate(&centered, &recon_centered, argvals)?;
383
384    // Alarms
385    let t2_alarm: Vec<bool> = t2.iter().map(|&v| v > chart.t2_limit.ucl).collect();
386    let spe_alarm: Vec<bool> = spe.iter().map(|&v| v > chart.spe_limit.ucl).collect();
387
388    Ok(SpmMonitorResult {
389        t2,
390        spe,
391        t2_alarm,
392        spe_alarm,
393        scores,
394    })
395}
396
397/// Build a multivariate SPM chart from Phase I data.
398///
399/// # Arguments
400/// * `variables` - Slice of in-control functional matrices (each n x m_p)
401/// * `argvals_list` - Per-variable grid points
402/// * `config` - SPM configuration
403///
404/// # Errors
405///
406/// Returns errors from MFPCA, T-squared computation, or control limit estimation.
407#[must_use = "expensive computation whose result should not be discarded"]
408pub fn mf_spm_phase1(
409    variables: &[&FdMatrix],
410    argvals_list: &[&[f64]],
411    config: &SpmConfig,
412) -> Result<MfSpmChart, FdarError> {
413    if variables.is_empty() {
414        return Err(FdarError::InvalidDimension {
415            parameter: "variables",
416            expected: "at least 1 variable".to_string(),
417            actual: "0 variables".to_string(),
418        });
419    }
420    if variables.len() != argvals_list.len() {
421        return Err(FdarError::InvalidDimension {
422            parameter: "argvals_list",
423            expected: format!("{} (matching variables)", variables.len()),
424            actual: format!("{}", argvals_list.len()),
425        });
426    }
427
428    let n = variables[0].nrows();
429    if n < 4 {
430        return Err(FdarError::InvalidDimension {
431            parameter: "variables",
432            expected: "at least 4 observations".to_string(),
433            actual: format!("{n} observations"),
434        });
435    }
436
437    // Validate argvals lengths
438    for (p, (var, argvals)) in variables.iter().zip(argvals_list.iter()).enumerate() {
439        if var.ncols() != argvals.len() {
440            return Err(FdarError::InvalidDimension {
441                parameter: "argvals_list",
442                expected: format!("{} for variable {p}", var.ncols()),
443                actual: format!("{}", argvals.len()),
444            });
445        }
446    }
447
448    // Split
449    let (tune_idx, cal_idx) = split_indices(n, config.tuning_fraction, config.seed);
450
451    let tune_vars: Vec<FdMatrix> = variables
452        .iter()
453        .map(|v| crate::cv::subset_rows(v, &tune_idx))
454        .collect();
455    let cal_vars: Vec<FdMatrix> = variables
456        .iter()
457        .map(|v| crate::cv::subset_rows(v, &cal_idx))
458        .collect();
459
460    let tune_refs: Vec<&FdMatrix> = tune_vars.iter().collect();
461
462    // MFPCA on tuning set
463    let mfpca_config = MfpcaConfig {
464        ncomp: config.ncomp,
465        weighted: true,
466    };
467    let mfpca_result = mfpca(&tune_refs, &mfpca_config)?;
468    let actual_ncomp = mfpca_result.eigenvalues.len();
469
470    // Project calibration set
471    let cal_refs: Vec<&FdMatrix> = cal_vars.iter().collect();
472    let cal_scores = mfpca_result.project(&cal_refs)?;
473
474    // T-squared
475    let t2_phase1 = hotelling_t2(&cal_scores, &mfpca_result.eigenvalues)?;
476
477    // SPE on calibration: need standardized centered and reconstructed
478    let cal_recon = mfpca_result.reconstruct(&cal_scores, actual_ncomp)?;
479
480    // For SPE, we need standardized centered data and standardized reconstruction
481    let n_cal = cal_vars[0].nrows();
482    let mut std_vars: Vec<FdMatrix> = Vec::with_capacity(variables.len());
483    let mut std_recon: Vec<FdMatrix> = Vec::with_capacity(variables.len());
484
485    for (p, cal_var) in cal_vars.iter().enumerate() {
486        let m_p = cal_var.ncols();
487        let scale = if mfpca_result.scales[p] > 1e-15 {
488            mfpca_result.scales[p]
489        } else {
490            1.0
491        };
492
493        let mut std_mat = FdMatrix::zeros(n_cal, m_p);
494        let mut recon_mat = FdMatrix::zeros(n_cal, m_p);
495        for i in 0..n_cal {
496            for j in 0..m_p {
497                std_mat[(i, j)] = (cal_var[(i, j)] - mfpca_result.means[p][j]) / scale;
498                recon_mat[(i, j)] = (cal_recon[p][(i, j)] - mfpca_result.means[p][j]) / scale;
499            }
500        }
501        std_vars.push(std_mat);
502        std_recon.push(recon_mat);
503    }
504
505    let std_refs: Vec<&FdMatrix> = std_vars.iter().collect();
506    let recon_refs: Vec<&FdMatrix> = std_recon.iter().collect();
507    let spe_phase1 = spe_multivariate(&std_refs, &recon_refs, argvals_list)?;
508
509    // Control limits
510    let t2_limit = t2_control_limit(actual_ncomp, config.alpha)?;
511    let spe_limit = spe_control_limit(&spe_phase1, config.alpha)?;
512
513    Ok(MfSpmChart {
514        mfpca: mfpca_result,
515        t2_phase1,
516        spe_phase1,
517        t2_limit,
518        spe_limit,
519        config: config.clone(),
520    })
521}
522
523/// Monitor new multivariate functional data against an established chart.
524///
525/// # Arguments
526/// * `chart` - Phase I multivariate SPM chart
527/// * `new_variables` - Per-variable new data (each n_new x m_p)
528/// * `argvals_list` - Per-variable grid points
529///
530/// # Errors
531///
532/// Returns errors from projection or statistic computation.
533#[must_use = "monitoring result should not be discarded"]
534pub fn mf_spm_monitor(
535    chart: &MfSpmChart,
536    new_variables: &[&FdMatrix],
537    argvals_list: &[&[f64]],
538) -> Result<SpmMonitorResult, FdarError> {
539    let n_vars = chart.mfpca.means.len();
540    if new_variables.len() != n_vars {
541        return Err(FdarError::InvalidDimension {
542            parameter: "new_variables",
543            expected: format!("{n_vars} variables"),
544            actual: format!("{} variables", new_variables.len()),
545        });
546    }
547
548    let actual_ncomp = chart.mfpca.eigenvalues.len();
549
550    // Project
551    let scores = chart.mfpca.project(new_variables)?;
552
553    // T-squared
554    let t2 = hotelling_t2(&scores, &chart.mfpca.eigenvalues)?;
555
556    // SPE
557    let recon = chart.mfpca.reconstruct(&scores, actual_ncomp)?;
558
559    let n_new = new_variables[0].nrows();
560    let mut std_vars: Vec<FdMatrix> = Vec::with_capacity(n_vars);
561    let mut std_recon: Vec<FdMatrix> = Vec::with_capacity(n_vars);
562
563    for (p, new_var) in new_variables.iter().enumerate() {
564        let m_p = new_var.ncols();
565        let scale = if chart.mfpca.scales[p] > 1e-15 {
566            chart.mfpca.scales[p]
567        } else {
568            1.0
569        };
570
571        let mut std_mat = FdMatrix::zeros(n_new, m_p);
572        let mut recon_mat = FdMatrix::zeros(n_new, m_p);
573        for i in 0..n_new {
574            for j in 0..m_p {
575                std_mat[(i, j)] = (new_var[(i, j)] - chart.mfpca.means[p][j]) / scale;
576                recon_mat[(i, j)] = (recon[p][(i, j)] - chart.mfpca.means[p][j]) / scale;
577            }
578        }
579        std_vars.push(std_mat);
580        std_recon.push(recon_mat);
581    }
582
583    let std_refs: Vec<&FdMatrix> = std_vars.iter().collect();
584    let recon_refs: Vec<&FdMatrix> = std_recon.iter().collect();
585    let spe = spe_multivariate(&std_refs, &recon_refs, argvals_list)?;
586
587    // Alarms
588    let t2_alarm: Vec<bool> = t2.iter().map(|&v| v > chart.t2_limit.ucl).collect();
589    let spe_alarm: Vec<bool> = spe.iter().map(|&v| v > chart.spe_limit.ucl).collect();
590
591    Ok(SpmMonitorResult {
592        t2,
593        spe,
594        t2_alarm,
595        spe_alarm,
596        scores,
597    })
598}