Skip to main content

fdars_core/spm/
frcc.rs

1//! Functional Regression Control Chart (FRCC).
2//!
3//! Monitors a functional response after adjusting for known scalar covariates
4//! using function-on-scalar regression (FOSR). The residuals are then monitored
5//! via FPCA-based T-squared and SPE statistics.
6//!
7//! This is useful when the process output (functional) depends on known inputs
8//! (scalar predictors), and we want to detect deviations beyond what the inputs
9//! explain.
10//!
11//! # Model assessment
12//!
13//! R-squared = 1 - SSR/SST measures the proportion of response variance explained
14//! by the FRCC model (computed pointwise across grid points and summed). For
15//! monitoring, R-squared > 0.7 indicates excellent model fit; R-squared 0.5--0.7
16//! is adequate; R-squared 0.3--0.5 is marginal (covariate adjustment helps but
17//! residual variation is large); R-squared < 0.3 suggests the functional
18//! predictors have weak predictive power and monitoring may be ineffective
19//! compared to standard `spm_phase1`.
20//!
21//! After building the FRCC chart, verify `fosr_r_squared` to assess model
22//! quality. R-squared values: > 0.5 (strong adjustment), 0.3--0.5 (moderate),
23//! 0.1--0.3 (weak), < 0.1 (rejected by default threshold).
24//!
25//! # Residual assumptions
26//!
27//! The monitoring assumes regression residuals are independent across observations.
28//! Autocorrelated residuals (e.g., from time-series data or batch-to-batch effects)
29//! inflate SPE alarm rates because the empirical SPE distribution underestimates the
30//! true variability. Check residual autocorrelation using the lag-1 sample
31//! autocorrelation of the SPE sequence and consider pre-whitening (e.g., fitting an
32//! AR(1) model to the residual scores) if significant.
33//!
34//! The SPE control limit assumes residuals are approximately independent across
35//! observations. If the residuals exhibit temporal autocorrelation, consider using
36//! bootstrap control limits via `spe_limit_robust()`.
37//!
38//! # References
39//!
40//! - Capezza, C., Lepore, A., Menafoglio, A., Palumbo, B. & Vantini, S.
41//!   (2020). Control charts for monitoring ship operating conditions and
42//!   CO2 emissions based on scalar-on-function regression. *Applied
43//!   Stochastic Models in Business and Industry*, 36(3), 477--500,
44//!   section 3.1 (FRCC construction), section 4 (monitoring procedure).
45
46use crate::error::FdarError;
47use crate::function_on_scalar::{fosr, predict_fosr, FosrResult};
48use crate::matrix::FdMatrix;
49use crate::regression::{fdata_to_pc_1d, FpcaResult};
50
51use super::control::{spe_control_limit, t2_control_limit, ControlLimit};
52use super::phase::{center_data, centered_reconstruct, split_indices};
53use super::stats::{hotelling_t2, spe_univariate};
54
55/// Configuration for FRCC chart construction.
56#[derive(Debug, Clone, PartialEq)]
57pub struct FrccConfig {
58    /// Number of principal components for residual FPCA (default 5).
59    pub ncomp: usize,
60    /// FOSR smoothing parameter; controls roughness penalty on β(t) (default 1e-4).
61    /// Larger values produce smoother coefficient functions. Typical range: [1e-6, 1e-2].
62    /// Use cross-validation if unsure.
63    pub fosr_lambda: f64,
64    /// Significance level (default 0.05).
65    pub alpha: f64,
66    /// Fraction of data for tuning (default 0.5).
67    ///
68    /// Must be in (0, 1). The tuning set is used for FOSR fitting and R-squared
69    /// assessment; the calibration set (1 - tuning_fraction) is used for FPCA
70    /// and control limit estimation. Larger tuning fractions give more stable
71    /// FOSR estimates but fewer calibration observations for control limits.
72    /// For n < 50, consider tuning_fraction = 0.4 to ensure adequate calibration.
73    /// For n > 200, tuning_fraction = 0.6 may improve FOSR stability.
74    pub tuning_fraction: f64,
75    /// Random seed (default 42).
76    pub seed: u64,
77    /// Minimum FOSR R² required to proceed (default 0.1).
78    /// If the FOSR model explains less than this fraction of variance,
79    /// frcc_phase1 returns an error suggesting standard SPM instead.
80    ///
81    /// Default 0.1 is a lenient threshold that catches only clearly useless
82    /// models. For production use, consider 0.2--0.3. An R² of 0.3 means
83    /// predictors explain 30% of functional variance --- enough for
84    /// meaningful covariate adjustment.
85    pub min_r_squared: f64,
86}
87
88impl Default for FrccConfig {
89    fn default() -> Self {
90        Self {
91            ncomp: 5,
92            fosr_lambda: 1e-4,
93            alpha: 0.05,
94            tuning_fraction: 0.5,
95            seed: 42,
96            min_r_squared: 0.1,
97        }
98    }
99}
100
101/// Phase I FRCC chart.
102#[derive(Debug, Clone, PartialEq)]
103#[non_exhaustive]
104pub struct FrccChart {
105    /// Fitted FOSR model.
106    pub fosr: FosrResult,
107    /// FPCA on calibration residuals.
108    pub residual_fpca: FpcaResult,
109    /// Eigenvalues from residual FPCA.
110    pub eigenvalues: Vec<f64>,
111    /// T-squared control limit.
112    pub t2_limit: ControlLimit,
113    /// SPE control limit.
114    pub spe_limit: ControlLimit,
115    /// Coefficient of determination (R²) for the FOSR model on the tuning set.
116    /// Values near 0 suggest the predictors explain little variance, and
117    /// the FRCC may not add value over a standard SPM chart.
118    pub fosr_r_squared: f64,
119    /// Configuration used.
120    pub config: FrccConfig,
121}
122
123/// Result of FRCC monitoring.
124#[derive(Debug, Clone, PartialEq)]
125#[non_exhaustive]
126pub struct FrccMonitorResult {
127    /// T-squared values.
128    pub t2: Vec<f64>,
129    /// SPE values.
130    pub spe: Vec<f64>,
131    /// T-squared alarm flags.
132    pub t2_alarm: Vec<bool>,
133    /// SPE alarm flags.
134    pub spe_alarm: Vec<bool>,
135    /// Residual scores.
136    pub residual_scores: FdMatrix,
137}
138
139/// Compute residuals: observed - predicted.
140fn compute_residuals(observed: &FdMatrix, predicted: &FdMatrix) -> FdMatrix {
141    let (n, m) = observed.shape();
142    let mut residuals = FdMatrix::zeros(n, m);
143    for i in 0..n {
144        for j in 0..m {
145            residuals[(i, j)] = observed[(i, j)] - predicted[(i, j)];
146        }
147    }
148    residuals
149}
150
151/// Build a Functional Regression Control Chart from Phase I data.
152///
153/// 1. Splits data into tuning and calibration sets
154/// 2. Fits FOSR on tuning set
155/// 3. Computes residuals on calibration set
156/// 4. Runs FPCA on calibration residuals
157/// 5. Computes T-squared and SPE control limits
158///
159/// The R² is computed on the tuning set, not the calibration set, to avoid
160/// optimistic bias. The tuning set is used for both FOSR fitting and R²
161/// assessment.
162///
163/// The SPE control limit assumes approximately independent residuals. For
164/// processes with temporal structure in the residuals, use `spe_limit_robust()`
165/// with bootstrap method.
166///
167/// # Arguments
168/// * `y_curves` - Functional response (n x m)
169/// * `predictors` - Scalar predictors (n x p)
170/// * `argvals` - Grid points (length m)
171/// * `config` - FRCC configuration
172///
173/// # Errors
174///
175/// Returns errors from FOSR fitting, FPCA, or control limit estimation.
176///
177/// # Example
178/// ```no_run
179/// use fdars_core::matrix::FdMatrix;
180/// use fdars_core::spm::frcc::{frcc_phase1, FrccConfig};
181/// let n = 60;
182/// let m = 10;
183/// let y = FdMatrix::from_column_major(
184///     (0..n*m).map(|i| (i as f64 * 0.1).sin()).collect(), n, m
185/// ).unwrap();
186/// let pred = FdMatrix::from_column_major(
187///     (0..n).map(|i| i as f64 / n as f64).collect(), n, 1
188/// ).unwrap();
189/// let argvals: Vec<f64> = (0..m).map(|i| i as f64 / (m-1) as f64).collect();
190/// let config = FrccConfig { min_r_squared: 0.0, ..FrccConfig::default() };
191/// let chart = frcc_phase1(&y, &pred, &argvals, &config).unwrap();
192/// assert!(chart.fosr_r_squared >= 0.0);
193/// ```
194#[must_use = "expensive computation whose result should not be discarded"]
195pub fn frcc_phase1(
196    y_curves: &FdMatrix,
197    predictors: &FdMatrix,
198    argvals: &[f64],
199    config: &FrccConfig,
200) -> Result<FrccChart, FdarError> {
201    let (n, m) = y_curves.shape();
202    let p = predictors.ncols();
203
204    if n < 6 {
205        return Err(FdarError::InvalidDimension {
206            parameter: "y_curves",
207            expected: "at least 6 observations".to_string(),
208            actual: format!("{n} observations"),
209        });
210    }
211    if predictors.nrows() != n {
212        return Err(FdarError::InvalidDimension {
213            parameter: "predictors",
214            expected: format!("{n} rows"),
215            actual: format!("{} rows", predictors.nrows()),
216        });
217    }
218    if argvals.len() != m {
219        return Err(FdarError::InvalidDimension {
220            parameter: "argvals",
221            expected: format!("{m}"),
222            actual: format!("{}", argvals.len()),
223        });
224    }
225    if config.tuning_fraction <= 0.0 || config.tuning_fraction >= 1.0 {
226        return Err(FdarError::InvalidParameter {
227            parameter: "tuning_fraction",
228            message: format!(
229                "tuning_fraction must be in (0, 1), got {}",
230                config.tuning_fraction
231            ),
232        });
233    }
234
235    // Split
236    let (tune_idx, cal_idx) = split_indices(n, config.tuning_fraction, config.seed);
237
238    let tune_y = crate::cv::subset_rows(y_curves, &tune_idx);
239    let tune_x = crate::cv::subset_rows(predictors, &tune_idx);
240    let cal_y = crate::cv::subset_rows(y_curves, &cal_idx);
241    let cal_x = crate::cv::subset_rows(predictors, &cal_idx);
242
243    let n_tune = tune_y.nrows();
244    let n_cal = cal_y.nrows();
245
246    if n_cal < 2 {
247        return Err(FdarError::InvalidDimension {
248            parameter: "y_curves",
249            expected: "calibration set with at least 2 observations".to_string(),
250            actual: format!("{n_cal} observations in calibration set"),
251        });
252    }
253
254    // Ensure enough observations for FOSR (needs n >= p + 2)
255    if n_tune < p + 2 {
256        return Err(FdarError::InvalidDimension {
257            parameter: "y_curves",
258            expected: format!("tuning set with at least {} observations", p + 2),
259            actual: format!("{n_tune} observations in tuning set"),
260        });
261    }
262
263    // FOSR on tuning set
264    if config.fosr_lambda < 0.0 {
265        return Err(FdarError::InvalidParameter {
266            parameter: "fosr_lambda",
267            message: format!(
268                "fosr_lambda must be non-negative, got {}",
269                config.fosr_lambda
270            ),
271        });
272    }
273    let fosr_lambda = config.fosr_lambda;
274    let fosr_result = fosr(&tune_y, &tune_x, fosr_lambda)?;
275
276    // Compute R² on tuning set.
277    // R² is computed pointwise across all grid points, implicitly assuming a
278    // uniform grid. For non-uniform grids, this gives equal weight to each
279    // discrete point rather than integrating over the domain. For grids with
280    // > 3x variation in spacing, consider computing a weighted R² using
281    // Simpson's weights on argvals. The pointwise R² here is adequate for
282    // uniform or near-uniform grids.
283    //
284    // The pointwise R² treats each grid point equally, which is appropriate
285    // for uniform or near-uniform grids. For strongly non-uniform grids, the
286    // functional R² (integrating with quadrature weights) would be more
287    // appropriate but is not currently implemented. In practice, the
288    // difference is small when the grid has < 3x variation in spacing.
289    let tune_predicted = predict_fosr(&fosr_result, &tune_x);
290    let fosr_r_squared = {
291        let (n_t, m_t) = tune_y.shape();
292        let mut ss_res = 0.0;
293        let mut ss_tot = 0.0;
294        // Per-point mean for total SS
295        for j in 0..m_t {
296            let col_mean: f64 = (0..n_t).map(|i| tune_y[(i, j)]).sum::<f64>() / n_t as f64;
297            for i in 0..n_t {
298                ss_res += (tune_y[(i, j)] - tune_predicted[(i, j)]).powi(2);
299                ss_tot += (tune_y[(i, j)] - col_mean).powi(2);
300            }
301        }
302        if ss_tot > 0.0 {
303            1.0 - ss_res / ss_tot
304        } else {
305            0.0
306        }
307    };
308
309    if fosr_r_squared < config.min_r_squared {
310        return Err(FdarError::ComputationFailed {
311            operation: "frcc_phase1",
312            detail: format!(
313                "FOSR R² = {fosr_r_squared:.4}; below threshold {:.4}. \
314                 Consider: (a) adding more predictors, (b) increasing the training \
315                 set size, (c) reducing fosr_lambda for a less smooth fit, or \
316                 (d) using standard `spm_phase1` instead. Low R² means the \
317                 predictors explain little variance, so covariate adjustment \
318                 provides minimal benefit and may introduce estimation noise.",
319                config.min_r_squared
320            ),
321        });
322    }
323
324    // Predict on calibration set and compute residuals
325    let cal_predicted = predict_fosr(&fosr_result, &cal_x);
326    let cal_residuals = compute_residuals(&cal_y, &cal_predicted);
327
328    // FPCA on calibration residuals
329    let ncomp = config.ncomp.min(n_cal - 1).min(m);
330    let residual_fpca = fdata_to_pc_1d(&cal_residuals, ncomp)?;
331    let actual_ncomp = residual_fpca.scores.ncols();
332
333    // Eigenvalues
334    let eigenvalues: Vec<f64> = residual_fpca
335        .singular_values
336        .iter()
337        .take(actual_ncomp)
338        .map(|&sv| sv * sv / (n_cal as f64 - 1.0))
339        .collect();
340
341    // T² on calibration scores: computed to verify FPCA but not used for
342    // control limits (which use chi² quantiles instead of empirical limits).
343    let _t2_cal = hotelling_t2(&residual_fpca.scores, &eigenvalues)?;
344
345    // SPE on calibration residuals
346    let cal_resid_centered = center_data(&cal_residuals, &residual_fpca.mean);
347    let cal_resid_recon = centered_reconstruct(&residual_fpca, &residual_fpca.scores, actual_ncomp);
348    let spe_cal = spe_univariate(&cal_resid_centered, &cal_resid_recon, argvals)?;
349
350    // Control limits
351    let t2_limit = t2_control_limit(actual_ncomp, config.alpha)?;
352    let spe_limit = spe_control_limit(&spe_cal, config.alpha)?;
353
354    Ok(FrccChart {
355        fosr: fosr_result,
356        residual_fpca,
357        eigenvalues,
358        t2_limit,
359        spe_limit,
360        fosr_r_squared,
361        config: config.clone(),
362    })
363}
364
365/// Monitor new data against a Functional Regression Control Chart.
366///
367/// 1. Predicts functional response from FOSR model
368/// 2. Computes residuals
369/// 3. Projects residuals through FPCA
370/// 4. Computes T-squared and SPE
371///
372/// # Arguments
373/// * `chart` - Phase I FRCC chart
374/// * `new_y` - New functional response (n_new x m)
375/// * `new_predictors` - New scalar predictors (n_new x p)
376/// * `argvals` - Grid points (length m)
377///
378/// # Errors
379///
380/// Returns errors from FOSR prediction, FPCA projection, or statistic computation.
381#[must_use = "monitoring result should not be discarded"]
382pub fn frcc_monitor(
383    chart: &FrccChart,
384    new_y: &FdMatrix,
385    new_predictors: &FdMatrix,
386    argvals: &[f64],
387) -> Result<FrccMonitorResult, FdarError> {
388    let m = chart.residual_fpca.mean.len();
389    if new_y.ncols() != m {
390        return Err(FdarError::InvalidDimension {
391            parameter: "new_y",
392            expected: format!("{m} columns"),
393            actual: format!("{} columns", new_y.ncols()),
394        });
395    }
396    if new_y.nrows() != new_predictors.nrows() {
397        return Err(FdarError::InvalidDimension {
398            parameter: "new_predictors",
399            expected: format!("{} rows", new_y.nrows()),
400            actual: format!("{} rows", new_predictors.nrows()),
401        });
402    }
403    // Verify predictor count matches the FOSR model (beta has p rows, one per predictor).
404    let expected_p = chart.fosr.beta.nrows();
405    if new_predictors.ncols() != expected_p {
406        return Err(FdarError::InvalidDimension {
407            parameter: "new_predictors",
408            expected: format!("{expected_p} columns (predictors)"),
409            actual: format!("{} columns", new_predictors.ncols()),
410        });
411    }
412
413    let ncomp = chart.eigenvalues.len();
414
415    // Predict and compute residuals
416    let predicted = predict_fosr(&chart.fosr, new_predictors);
417    let residuals = compute_residuals(new_y, &predicted);
418
419    // Project residuals through FPCA
420    let residual_scores = chart.residual_fpca.project(&residuals)?;
421
422    // T-squared
423    let t2 = hotelling_t2(&residual_scores, &chart.eigenvalues)?;
424
425    // SPE
426    let resid_centered = center_data(&residuals, &chart.residual_fpca.mean);
427    let resid_recon = centered_reconstruct(&chart.residual_fpca, &residual_scores, ncomp);
428    let spe = spe_univariate(&resid_centered, &resid_recon, argvals)?;
429
430    // Alarms
431    let t2_alarm: Vec<bool> = t2.iter().map(|&v| v > chart.t2_limit.ucl).collect();
432    let spe_alarm: Vec<bool> = spe.iter().map(|&v| v > chart.spe_limit.ucl).collect();
433
434    Ok(FrccMonitorResult {
435        t2,
436        spe,
437        t2_alarm,
438        spe_alarm,
439        residual_scores,
440    })
441}