Skip to main content

fdars_core/spm/
control.rs

1//! Control limits for SPM monitoring statistics.
2//!
3//! Provides upper control limits (UCL) for T-squared and SPE statistics:
4//! - **T-squared**: chi-squared distribution quantile. For finite calibration
5//!   samples of size *n*, T² follows `(n·ncomp/(n−ncomp))·F(ncomp, n−ncomp)`
6//!   rather than `χ²(ncomp)`. The chi-squared limit is the large-sample
7//!   (*n* → ∞) limit.
8//! - **SPE**: moment-matched chi-squared approximation (Box, 1954, Theorem 1,
9//!   pp. 292–295). The derivation matches the first two moments of the SPE
10//!   distribution to a scaled chi-squared: `E[a·χ²(b)] = a·b = mean`,
11//!   `Var[a·χ²(b)] = 2a²·b = var`, giving `a = var/(2·mean)`,
12//!   `b = 2·mean²/var`.
13//!
14//! # Accuracy
15//!
16//! The moment-matching approximation is exact when SPE follows a scaled
17//! chi-squared distribution (holds under Gaussian scores). For non-Gaussian
18//! data, the approximation error is O(κ₄) where κ₄ is the excess kurtosis
19//! of the SPE distribution. Use [`spe_moment_match_diagnostic`] to assess
20//! adequacy.
21//!
22//! # References
23//!
24//! - Box, G.E.P. (1954). Some theorems on quadratic forms applied in the
25//!   study of analysis of variance problems, I. *Annals of Mathematical
26//!   Statistics*, 25(2), 290–302. Theorem 1, pp. 292–295.
27//! - Woodall, W.H. & Ncube, M.M. (1985). Multivariate CUSUM quality-control
28//!   procedures. *Technometrics*, 27(3), 285–292. §2, pp. 286–288.
29
30use super::chi_squared::chi2_quantile;
31use crate::error::FdarError;
32
33/// A control limit for a monitoring statistic.
34#[derive(Debug, Clone, PartialEq)]
35#[non_exhaustive]
36pub struct ControlLimit {
37    /// Upper control limit value.
38    pub ucl: f64,
39    /// Significance level used.
40    pub alpha: f64,
41    /// Human-readable description of how the limit was computed.
42    pub description: String,
43}
44
45/// Compute the T-squared control limit based on the chi-squared distribution.
46///
47/// UCL = chi2_quantile(1 - alpha, ncomp)
48///
49/// Based on the chi-squared approximation to the Hotelling T² distribution.
50/// For small calibration samples (n < 30), this may be anti-conservative;
51/// consider `t2_limit_robust()` with bootstrap method.
52///
53/// # Finite-sample correction
54///
55/// For finite calibration samples of size *n*, T² follows
56/// `(n·ncomp/(n−ncomp))·F(ncomp, n−ncomp)` rather than `χ²(ncomp)`. The
57/// chi-squared limit used here is the large-sample (*n* → ∞) limit. For
58/// *n* < 30, the F-based limit should be preferred (Woodall & Ncube, 1985,
59/// §2, pp. 286–288).
60///
61/// # Arguments
62/// * `ncomp` - Number of principal components (degrees of freedom)
63/// * `alpha` - Significance level (e.g. 0.05)
64///
65/// # Example
66///
67/// ```
68/// use fdars_core::spm::control::t2_control_limit;
69/// let limit = t2_control_limit(3, 0.05).unwrap();
70/// assert!(limit.ucl > 0.0);
71/// assert!((limit.ucl - 7.815).abs() < 0.01); // chi2(0.95, 3)
72/// ```
73///
74/// # Errors
75///
76/// Returns [`FdarError::InvalidParameter`] if `ncomp` is 0 or `alpha` is not in (0, 1).
77#[must_use = "control limit should not be discarded"]
78pub fn t2_control_limit(ncomp: usize, alpha: f64) -> Result<ControlLimit, FdarError> {
79    if ncomp == 0 {
80        return Err(FdarError::InvalidParameter {
81            parameter: "ncomp",
82            message: "ncomp must be at least 1".to_string(),
83        });
84    }
85    if alpha <= 0.0 || alpha >= 1.0 {
86        return Err(FdarError::InvalidParameter {
87            parameter: "alpha",
88            message: format!("alpha must be in (0, 1), got {alpha}"),
89        });
90    }
91
92    let ucl = chi2_quantile(1.0 - alpha, ncomp);
93
94    Ok(ControlLimit {
95        ucl,
96        alpha,
97        description: format!("T2 ~ chi2({ncomp}), alpha={alpha}"),
98    })
99}
100
101/// Compute the SPE control limit using moment-matched chi-squared approximation.
102///
103/// Uses the Box (1954, Theorem 1, pp. 292–295) moment-matching approximation:
104/// SPE ~ a * chi²(b) where `a = var/(2·mean)`, `b = ceil(2·mean²/var)`. This
105/// is accurate when SPE values are approximately chi-squared distributed,
106/// which holds when the number of retained components is moderate (5–20)
107/// and sample size is adequate (n > 30).
108///
109/// Estimates parameters a and b such that SPE ~ a * chi2(b):
110///   a = var(spe) / (2 * mean(spe))
111///   b = 2 * mean(spe)^2 / var(spe)
112///   UCL = a * chi2_quantile(1 - alpha, ceil(b))
113///
114/// # Accuracy
115///
116/// The moment-matching approximation is exact when SPE follows a scaled
117/// chi-squared distribution (holds under Gaussian scores). For non-Gaussian
118/// data, the approximation error is O(κ₄) where κ₄ is the excess kurtosis
119/// of the SPE distribution. Use [`spe_moment_match_diagnostic`] to check.
120///
121/// # Rounding choice
122///
123/// Using `ceil()` rather than `round()` for the degrees-of-freedom parameter
124/// *b* gives a conservative (wider) control limit, ensuring the nominal
125/// false-alarm rate is not exceeded.
126///
127/// # Arguments
128/// * `spe_values` - In-control SPE values from calibration data
129/// * `alpha` - Significance level (e.g. 0.05)
130///
131/// # Example
132///
133/// ```
134/// use fdars_core::spm::control::spe_control_limit;
135/// let spe_values = vec![1.0, 2.0, 1.5, 3.0, 2.5, 1.8, 2.2, 1.2, 2.8, 1.6];
136/// let limit = spe_control_limit(&spe_values, 0.05).unwrap();
137/// assert!(limit.ucl > 0.0);
138/// ```
139///
140/// # Errors
141///
142/// Returns [`FdarError::InvalidDimension`] if `spe_values` has fewer than 2 elements.
143/// Returns [`FdarError::InvalidParameter`] if `alpha` is not in (0, 1).
144/// Returns [`FdarError::ComputationFailed`] if the estimated variance is zero or negative.
145#[must_use = "control limit should not be discarded"]
146pub fn spe_control_limit(spe_values: &[f64], alpha: f64) -> Result<ControlLimit, FdarError> {
147    let n = spe_values.len();
148    if n < 2 {
149        return Err(FdarError::InvalidDimension {
150            parameter: "spe_values",
151            expected: "at least 2 values".to_string(),
152            actual: format!("{n} values"),
153        });
154    }
155    if alpha <= 0.0 || alpha >= 1.0 {
156        return Err(FdarError::InvalidParameter {
157            parameter: "alpha",
158            message: format!("alpha must be in (0, 1), got {alpha}"),
159        });
160    }
161
162    let mean: f64 = spe_values.iter().sum::<f64>() / n as f64;
163    let var: f64 = spe_values.iter().map(|&s| (s - mean).powi(2)).sum::<f64>() / (n as f64 - 1.0);
164
165    if mean <= 0.0 || var <= 0.0 {
166        return Err(FdarError::ComputationFailed {
167            operation: "spe_control_limit",
168            detail: format!(
169                "SPE mean={mean:.6}, var={var:.6}; cannot estimate chi-squared parameters from non-positive moments"
170            ),
171        });
172    }
173
174    let a = var / (2.0 * mean);
175    let b = 2.0 * mean * mean / var;
176    // Using ceil() rather than round() gives a conservative (wider) control
177    // limit, ensuring the nominal false-alarm rate is not exceeded.
178    let b_int = b.ceil().max(1.0) as usize;
179
180    let ucl = a * chi2_quantile(1.0 - alpha, b_int);
181
182    Ok(ControlLimit {
183        ucl,
184        alpha,
185        description: format!("SPE ~ {a:.4} * chi2({b_int}), alpha={alpha}"),
186    })
187}
188
189/// Diagnostic for the SPE moment-match chi-squared approximation.
190///
191/// Computes the excess kurtosis of the SPE values and compares it to the
192/// theoretical kurtosis of the fitted chi-squared distribution (12/b).
193/// A large discrepancy suggests the chi-squared approximation may be poor.
194///
195/// Returns `(excess_kurtosis, theoretical_kurtosis, is_adequate)` where
196/// `is_adequate` is true when the absolute difference is within 50% of
197/// the theoretical value. Interpretation: `is_adequate == true` (ratio
198/// within 50%) indicates the chi-squared model fits well. When inadequate,
199/// the SPE distribution may be far from chi-squared (possibly multi-modal
200/// or heavy-tailed); in that case, use `spe_limit_robust()` with bootstrap
201/// or KDE method instead.
202///
203/// # Arguments
204/// * `spe_values` - In-control SPE values
205///
206/// # Errors
207///
208/// Returns [`FdarError::InvalidDimension`] if fewer than 4 values are provided.
209#[must_use = "diagnostic result should not be discarded"]
210pub fn spe_moment_match_diagnostic(spe_values: &[f64]) -> Result<(f64, f64, bool), FdarError> {
211    let n = spe_values.len();
212    if n < 4 {
213        return Err(FdarError::InvalidDimension {
214            parameter: "spe_values",
215            expected: "at least 4 values".to_string(),
216            actual: format!("{n} values"),
217        });
218    }
219
220    let mean: f64 = spe_values.iter().sum::<f64>() / n as f64;
221    let var: f64 = spe_values.iter().map(|&s| (s - mean).powi(2)).sum::<f64>() / (n as f64 - 1.0);
222
223    if var <= 0.0 {
224        return Err(FdarError::ComputationFailed {
225            operation: "spe_moment_match_diagnostic",
226            detail: "variance is zero".to_string(),
227        });
228    }
229
230    // Excess kurtosis: E[(X-mu)^4]/sigma^4 - 3
231    let m4: f64 = spe_values.iter().map(|&s| (s - mean).powi(4)).sum::<f64>() / n as f64;
232    let excess_kurtosis = m4 / (var * var) - 3.0;
233
234    // Theoretical: for chi2(b) scaled by a, kurtosis = 12/b
235    // b = 2 * mean^2 / var
236    let b = 2.0 * mean * mean / var;
237    let theoretical_kurtosis = if b > 0.0 { 12.0 / b } else { f64::INFINITY };
238
239    // Adequate if |observed - theoretical| <= 0.5 * theoretical
240    let is_adequate = if theoretical_kurtosis.is_finite() {
241        (excess_kurtosis - theoretical_kurtosis).abs() <= 0.5 * theoretical_kurtosis
242    } else {
243        false
244    };
245
246    Ok((excess_kurtosis, theoretical_kurtosis, is_adequate))
247}