Skip to main content

fdars_core/spm/
profile.rs

1//! Profile monitoring for functional data.
2//!
3//! Monitors the relationship between scalar predictors and functional
4//! responses over time using Function-on-Scalar Regression (FOSR).
5//! Detects changes in the coefficient functions beta(t) via FPCA and T-squared.
6//!
7//! # Mathematical framework
8//!
9//! The functional response model is y_i(t) = x_i^T beta(t) + epsilon_i(t),
10//! where beta(t) = [beta_1(t), ..., beta_p(t)]^T are coefficient functions.
11//! Rolling FOSR estimates beta(t) within each window, producing a sequence
12//! of vectorized coefficient functions. FPCA extracts the dominant modes of
13//! variation in the beta(t) sequence, and T-squared monitors for shifts
14//! in the score distribution.
15//!
16//! Beta coefficients from rolling windows are vectorized column-major:
17//! [beta_1(t_1), ..., beta_1(t_m), beta_2(t_1), ..., beta_2(t_m), ...]
18//! to form the FPCA input matrix. This preserves the functional structure
19//! within each predictor.
20//!
21//! **Note on overlapping windows:** When `step_size < window_size`, consecutive
22//! windows share observations, inducing serial correlation in the beta(t) estimates.
23//! The `effective_n_windows` field in [`ProfileChart`] provides a Bartlett-style
24//! correction for the effective degrees of freedom. Specifically, for overlapping
25//! windows with step_size < window_size, the effective number of independent
26//! windows is reduced. The Bartlett correction n_eff = n_windows / (1 + 2|rho_1|)
27//! accounts for AR(1) autocorrelation rho_1 in the windowed statistics, where
28//! rho_1 is estimated from the lag-1 autocorrelation of the T-squared sequence
29//! (Bartlett, 1946, section 3, pp. 31--33).
30//!
31//! # References
32//!
33//! - Bartlett, M.S. (1946). On the theoretical specification of sampling
34//!   properties of autocorrelated time series. *Journal of the Royal
35//!   Statistical Society B*, 8(1), 27--41, section 3, pp. 31--33.
36//! - Ledolter, J. & Swersey, A.J. (2007). *Testing 1-2-3: Experimental
37//!   design with applications in marketing and service operations*.
38//!   Stanford University Press, Ch. 6 (profile monitoring).
39
40use crate::error::FdarError;
41use crate::function_on_scalar::{fosr, FosrResult};
42use crate::matrix::FdMatrix;
43use crate::regression::{fdata_to_pc_1d, FpcaResult};
44use crate::spm::control::{t2_control_limit, ControlLimit};
45use crate::spm::stats::hotelling_t2;
46
47/// Configuration for profile monitoring.
48///
49/// # Parameter guidance
50///
51/// - `window_size`: Must be >= p + 2 where p is the number of predictors.
52///   Larger windows give more stable beta(t) estimates but reduce temporal resolution.
53///   Typical: 20--50 for slowly varying processes. Recommended `window_size` is
54///   5p to 10p where p is the number of predictors, to ensure stable FOSR
55///   estimation within each window (the design matrix X^T X has condition number
56///   roughly proportional to window_size / p).
57/// - `step_size`: Controls window overlap. step_size = window_size gives no overlap
58///   (independent windows); step_size = 1 gives maximum overlap (smoothest tracking
59///   but highest autocorrelation). Typical: window_size/2 or window_size/4.
60#[derive(Debug, Clone, PartialEq)]
61pub struct ProfileMonitorConfig {
62    /// FOSR smoothing parameter (default 1e-4).
63    pub fosr_lambda: f64,
64    /// Number of principal components for beta-function FPCA (default 3).
65    /// Typically 2--5 suffices since coefficient functions are smoother than
66    /// raw data. Use `select_ncomp()` on the beta eigenvalues for data-driven
67    /// selection.
68    pub ncomp: usize,
69    /// Significance level (default 0.05).
70    pub alpha: f64,
71    /// Window size for rolling FOSR (default 20).
72    pub window_size: usize,
73    /// Step size between windows (default 1).
74    pub step_size: usize,
75}
76
77impl Default for ProfileMonitorConfig {
78    fn default() -> Self {
79        Self {
80            fosr_lambda: 1e-4,
81            ncomp: 3,
82            alpha: 0.05,
83            window_size: 20,
84            step_size: 1,
85        }
86    }
87}
88
89/// Phase I profile monitoring chart.
90///
91/// When `effective_n_windows` is much smaller than the actual number of
92/// windows, the chi-squared UCL may need adjustment. A practical approach:
93/// multiply the UCL by `effective_n_windows / n_windows` to approximate a
94/// Bonferroni-like correction.
95#[derive(Debug, Clone, PartialEq)]
96#[non_exhaustive]
97pub struct ProfileChart {
98    /// FOSR result from the full reference data.
99    pub reference_fosr: FosrResult,
100    /// FPCA of the rolling beta coefficient functions.
101    pub beta_fpca: FpcaResult,
102    /// Eigenvalues: sv² / (n_windows - 1).
103    pub eigenvalues: Vec<f64>,
104    /// T-squared control limit for beta monitoring.
105    pub t2_limit: ControlLimit,
106    /// Lag-1 autocorrelation of the Phase I T-squared statistics from rolling windows.
107    /// High values (> 0.5) indicate serial correlation from window overlap.
108    ///
109    /// Computed from the Phase I T-squared statistic sequence. Values |rho_1| > 0.3
110    /// indicate substantial serial correlation; `effective_n_windows` will be
111    /// notably reduced. The estimator uses the unbiased sample variance (n-1
112    /// denominator) for both variance and covariance terms.
113    pub lag1_autocorrelation: f64,
114    /// Effective number of independent windows (Bartlett correction for overlap).
115    /// When step_size < window_size, consecutive windows overlap and are
116    /// correlated, reducing the effective degrees of freedom.
117    ///
118    /// Computed as n_eff = n_windows / (1 + 2|rho_1|), which is the Bartlett
119    /// (1946, section 3) formula for AR(1) processes. For overlap fraction
120    /// f = 1 - step_size/window_size, the lag-1 autocorrelation is approximately
121    /// f, so n_eff ~ n_windows / (1 + 2f). This is conservative (underestimates
122    /// n_eff) for non-AR(1) dependence structures.
123    ///
124    /// Use this to assess whether the chi-squared UCL is reliable. When
125    /// `effective_n_windows` < 20, consider widening the control limit or
126    /// using bootstrap limits instead.
127    pub effective_n_windows: f64,
128    /// Configuration used.
129    pub config: ProfileMonitorConfig,
130}
131
132/// Result of Phase II profile monitoring.
133#[derive(Debug, Clone, PartialEq)]
134#[non_exhaustive]
135pub struct ProfileMonitorResult {
136    /// Per-window beta coefficient matrices (vectorized).
137    pub betas: FdMatrix,
138    /// T-squared values for each window.
139    pub t2: Vec<f64>,
140    /// T-squared alarm flags.
141    pub t2_alarm: Vec<bool>,
142    /// FPC scores for the beta functions.
143    pub beta_scores: FdMatrix,
144}
145
146/// Build a Phase I profile monitoring chart.
147///
148/// 1. Fits FOSR on the full training data to get reference β(t)
149/// 2. Rolls windows over the data, fitting FOSR per window
150/// 3. Vectorizes the β(t) from each window
151/// 4. Runs FPCA on the vectorized betas
152/// 5. Computes T-squared control limits
153///
154/// When `step_size > window_size`, windows do not overlap and some observations
155/// fall between windows (not monitored).
156///
157/// # Arguments
158/// * `y_curves` - Response functional data (n × m)
159/// * `predictors` - Scalar predictors (n × p)
160/// * `argvals` - Grid points (length m)
161/// * `config` - Profile monitoring configuration
162///
163/// # Errors
164///
165/// Returns errors from FOSR or FPCA computation.
166///
167/// # Example
168/// ```no_run
169/// use fdars_core::matrix::FdMatrix;
170/// use fdars_core::spm::profile::{profile_phase1, ProfileMonitorConfig};
171/// let n = 50;
172/// let m = 10;
173/// let y = FdMatrix::from_column_major(
174///     (0..n*m).map(|i| (i as f64 * 0.1).sin()).collect(), n, m
175/// ).unwrap();
176/// let pred = FdMatrix::from_column_major(
177///     (0..n).map(|i| i as f64 / n as f64).collect(), n, 1
178/// ).unwrap();
179/// let argvals: Vec<f64> = (0..m).map(|i| i as f64 / (m-1) as f64).collect();
180/// let config = ProfileMonitorConfig { window_size: 10, step_size: 5, ncomp: 2, ..ProfileMonitorConfig::default() };
181/// let chart = profile_phase1(&y, &pred, &argvals, &config).unwrap();
182/// assert!(chart.eigenvalues.len() >= 1);
183/// ```
184#[must_use = "expensive computation whose result should not be discarded"]
185pub fn profile_phase1(
186    y_curves: &FdMatrix,
187    predictors: &FdMatrix,
188    argvals: &[f64],
189    config: &ProfileMonitorConfig,
190) -> Result<ProfileChart, FdarError> {
191    let (n, m) = y_curves.shape();
192    if predictors.nrows() != n {
193        return Err(FdarError::InvalidDimension {
194            parameter: "predictors",
195            expected: format!("{n} rows"),
196            actual: format!("{} rows", predictors.nrows()),
197        });
198    }
199    if argvals.len() != m {
200        return Err(FdarError::InvalidDimension {
201            parameter: "argvals",
202            expected: format!("{m}"),
203            actual: format!("{}", argvals.len()),
204        });
205    }
206    if config.step_size == 0 {
207        return Err(FdarError::InvalidParameter {
208            parameter: "step_size",
209            message: "step_size must be at least 1".to_string(),
210        });
211    }
212    if config.window_size < 3 {
213        return Err(FdarError::InvalidParameter {
214            parameter: "window_size",
215            message: format!("window_size must be >= 3, got {}", config.window_size),
216        });
217    }
218    if config.window_size > n {
219        return Err(FdarError::InvalidParameter {
220            parameter: "window_size",
221            message: format!(
222                "window_size ({}) exceeds data size ({n})",
223                config.window_size
224            ),
225        });
226    }
227
228    // Early check: ensure enough windows before expensive FOSR fitting.
229    let expected_n_windows = if n >= config.window_size {
230        (n - config.window_size) / config.step_size + 1
231    } else {
232        0
233    };
234    if expected_n_windows < 4 {
235        return Err(FdarError::InvalidDimension {
236            parameter: "data",
237            expected: "enough data for at least 4 windows".to_string(),
238            actual: format!(
239                "{expected_n_windows} windows (n={n}, window_size={}, step_size={})",
240                config.window_size, config.step_size
241            ),
242        });
243    }
244
245    // Fit reference FOSR on full data
246    let reference_fosr = fosr(y_curves, predictors, config.fosr_lambda)?;
247
248    // Rolling windows: extract per-window FOSR betas
249    let beta_vecs = rolling_betas(y_curves, predictors, config)?;
250    let n_windows = beta_vecs.nrows();
251
252    if n_windows < 4 {
253        return Err(FdarError::InvalidDimension {
254            parameter: "data",
255            expected: "enough data for at least 4 windows".to_string(),
256            actual: format!("{n_windows} windows"),
257        });
258    }
259
260    // FPCA on vectorized betas
261    let ncomp = config.ncomp.min(n_windows - 1).min(beta_vecs.ncols());
262    let beta_fpca = fdata_to_pc_1d(&beta_vecs, ncomp)?;
263    let actual_ncomp = beta_fpca.scores.ncols();
264
265    // Eigenvalues
266    let eigenvalues: Vec<f64> = beta_fpca
267        .singular_values
268        .iter()
269        .take(actual_ncomp)
270        .map(|&sv| sv * sv / (n_windows as f64 - 1.0))
271        .collect();
272
273    // Compute Phase I T² for autocorrelation diagnostic
274    let phase1_t2 = hotelling_t2(&beta_fpca.scores, &eigenvalues)?;
275    // Lag-1 autocorrelation using unbiased sample variance (n-1 denominator).
276    // Both variance and covariance use the (n-1) denominator. The ratio
277    // ρ₁ = cov/var is invariant to this choice, but (n-1) gives the unbiased
278    // sample variance.
279    let lag1_autocorrelation = if phase1_t2.len() > 2 {
280        let n_t2 = phase1_t2.len();
281        let mean_t2 = phase1_t2.iter().sum::<f64>() / n_t2 as f64;
282        let var_t2: f64 = phase1_t2
283            .iter()
284            .map(|&v| (v - mean_t2).powi(2))
285            .sum::<f64>()
286            / (n_t2 - 1) as f64;
287        if var_t2 > 0.0 {
288            let cov1: f64 = (0..n_t2 - 1)
289                .map(|i| (phase1_t2[i] - mean_t2) * (phase1_t2[i + 1] - mean_t2))
290                .sum::<f64>()
291                / (n_t2 - 1) as f64;
292            (cov1 / var_t2).clamp(-1.0, 1.0)
293        } else {
294            0.0
295        }
296    } else {
297        0.0
298    };
299
300    // Bartlett (1946) effective sample size: n_eff = n / (1 + 2|ρ₁|).
301    // See Bartlett, M.S. (1946), Section 3.
302    // The Bartlett (1946) formula n_eff = n/(1 + 2ρ₁) is derived for
303    // AR(1) processes and provides a first-order correction for general
304    // stationary processes. For rolling-window statistics with overlap
305    // fraction f = 1 - step_size/window_size, the lag-1 autocorrelation
306    // is approximately f, so n_eff ≈ n/(1 + 2f). This is conservative
307    // (underestimates n_eff) for non-AR(1) dependence structures.
308    let effective_n_windows = if lag1_autocorrelation.abs() > 0.01 {
309        // Use absolute value: both positive (overlapping windows) and negative
310        // (anti-correlated) autocorrelation reduce the effective degrees of freedom.
311        let bartlett_factor = (1.0 + 2.0 * lag1_autocorrelation.abs()).max(1.0);
312        (n_windows as f64 / bartlett_factor).max(2.0)
313    } else {
314        n_windows as f64
315    };
316
317    // Control limit
318    let t2_limit = t2_control_limit(actual_ncomp, config.alpha)?;
319
320    Ok(ProfileChart {
321        reference_fosr,
322        beta_fpca,
323        eigenvalues,
324        t2_limit,
325        lag1_autocorrelation,
326        effective_n_windows,
327        config: config.clone(),
328    })
329}
330
331/// Monitor new data against a Phase I profile chart.
332///
333/// 1. Fits FOSR per rolling window on new data
334/// 2. Vectorizes β(t) and projects onto Phase I beta-FPCA
335/// 3. Computes T-squared
336///
337/// # Arguments
338/// * `chart` - Phase I profile chart
339/// * `new_y` - New response functional data (n × m)
340/// * `new_predictors` - New scalar predictors (n × p)
341/// * `argvals` - Grid points (length m)
342/// * `config` - Profile monitoring configuration
343///
344/// # Errors
345///
346/// Returns errors from FOSR or projection.
347#[must_use = "monitoring result should not be discarded"]
348pub fn profile_monitor(
349    chart: &ProfileChart,
350    new_y: &FdMatrix,
351    new_predictors: &FdMatrix,
352    _argvals: &[f64],
353    config: &ProfileMonitorConfig,
354) -> Result<ProfileMonitorResult, FdarError> {
355    if config.step_size == 0 {
356        return Err(FdarError::InvalidParameter {
357            parameter: "step_size",
358            message: "step_size must be at least 1".to_string(),
359        });
360    }
361    let n = new_y.nrows();
362    if new_predictors.nrows() != n {
363        return Err(FdarError::InvalidDimension {
364            parameter: "new_predictors",
365            expected: format!("{n} rows"),
366            actual: format!("{} rows", new_predictors.nrows()),
367        });
368    }
369    // Validate that new_y grid size matches the reference FOSR grid (m)
370    let expected_m = chart.reference_fosr.beta.ncols();
371    if new_y.ncols() != expected_m {
372        return Err(FdarError::InvalidDimension {
373            parameter: "new_y",
374            expected: format!("{expected_m} columns (grid points)"),
375            actual: format!("{} columns", new_y.ncols()),
376        });
377    }
378
379    // Rolling windows on new data
380    let beta_vecs = rolling_betas(new_y, new_predictors, config)?;
381
382    // Project onto Phase I FPCA
383    let beta_scores = chart.beta_fpca.project(&beta_vecs)?;
384
385    // T-squared
386    let t2 = hotelling_t2(&beta_scores, &chart.eigenvalues)?;
387
388    // Alarms
389    let t2_alarm: Vec<bool> = t2.iter().map(|&v| v > chart.t2_limit.ucl).collect();
390
391    Ok(ProfileMonitorResult {
392        betas: beta_vecs,
393        t2,
394        t2_alarm,
395        beta_scores,
396    })
397}
398
399/// Extract vectorized FOSR betas from rolling windows.
400///
401/// Each window must have sufficient rank for FOSR fitting. If a window fails
402/// (e.g., due to collinear predictors), the function returns an error.
403/// Ensure `window_size` is large enough relative to `p` (number of predictors).
404fn rolling_betas(
405    y_curves: &FdMatrix,
406    predictors: &FdMatrix,
407    config: &ProfileMonitorConfig,
408) -> Result<FdMatrix, FdarError> {
409    let n = y_curves.nrows();
410    let m = y_curves.ncols();
411    let p = predictors.ncols();
412
413    if config.window_size < p + 2 {
414        return Err(FdarError::InvalidParameter {
415            parameter: "window_size",
416            message: format!(
417                "window_size ({}) must be >= p + 2 = {} for FOSR fitting",
418                config.window_size,
419                p + 2
420            ),
421        });
422    }
423
424    let mut windows = Vec::new();
425    let mut start = 0;
426    while start + config.window_size <= n {
427        windows.push(start);
428        start += config.step_size;
429    }
430
431    if windows.is_empty() {
432        return Err(FdarError::InvalidDimension {
433            parameter: "data",
434            expected: format!(
435                "at least {} observations for one window",
436                config.window_size
437            ),
438            actual: format!("{n} observations"),
439        });
440    }
441
442    // Beta coefficients are vectorized in row-major order:
443    // β₁(t₁),...,β₁(t_m),β₂(t₁),...,β₂(t_m).
444    // Each row of beta_mat represents one window's complete coefficient function.
445    let beta_len = p * m;
446    let n_windows = windows.len();
447    let mut beta_mat = FdMatrix::zeros(n_windows, beta_len);
448
449    for (w_idx, &win_start) in windows.iter().enumerate() {
450        // Extract window data
451        let mut y_window = FdMatrix::zeros(config.window_size, m);
452        let mut pred_window = FdMatrix::zeros(config.window_size, p);
453        for i in 0..config.window_size {
454            for j in 0..m {
455                y_window[(i, j)] = y_curves[(win_start + i, j)];
456            }
457            for j in 0..p {
458                pred_window[(i, j)] = predictors[(win_start + i, j)];
459            }
460        }
461
462        // Fit FOSR
463        let fosr_result = fosr(&y_window, &pred_window, config.fosr_lambda)?;
464
465        // Vectorize beta (p × m) into row of beta_mat
466        // fosr_result.beta is p × m where row j = βⱼ(t)
467        for j in 0..p {
468            for t in 0..m {
469                beta_mat[(w_idx, j * m + t)] = fosr_result.beta[(j, t)];
470            }
471        }
472    }
473
474    Ok(beta_mat)
475}