Skip to main content

fdars_core/spm/
profile.rs

1//! Profile monitoring for functional data.
2//!
3//! Monitors the relationship between scalar predictors and functional
4//! responses over time using Function-on-Scalar Regression (FOSR).
5//! Detects changes in the coefficient functions beta(t) via FPCA and T-squared.
6//!
7//! # Mathematical framework
8//!
9//! The functional response model is y_i(t) = x_i^T beta(t) + epsilon_i(t),
10//! where beta(t) = [beta_1(t), ..., beta_p(t)]^T are coefficient functions.
11//! Rolling FOSR estimates beta(t) within each window, producing a sequence
12//! of vectorized coefficient functions. FPCA extracts the dominant modes of
13//! variation in the beta(t) sequence, and T-squared monitors for shifts
14//! in the score distribution.
15//!
16//! Beta coefficients from rolling windows are vectorized column-major:
17//! [beta_1(t_1), ..., beta_1(t_m), beta_2(t_1), ..., beta_2(t_m), ...]
18//! to form the FPCA input matrix. This preserves the functional structure
19//! within each predictor.
20//!
21//! **Note on overlapping windows:** When `step_size < window_size`, consecutive
22//! windows share observations, inducing serial correlation in the beta(t) estimates.
23//! The `effective_n_windows` field in [`ProfileChart`] provides a Bartlett-style
24//! correction for the effective degrees of freedom. Specifically, for overlapping
25//! windows with step_size < window_size, the effective number of independent
26//! windows is reduced. The Bartlett correction n_eff = n_windows / (1 + 2|rho_1|)
27//! accounts for AR(1) autocorrelation rho_1 in the windowed statistics, where
28//! rho_1 is estimated from the lag-1 autocorrelation of the T-squared sequence
29//! (Bartlett, 1946, section 3, pp. 31--33).
30//!
31//! # References
32//!
33//! - Bartlett, M.S. (1946). On the theoretical specification of sampling
34//!   properties of autocorrelated time series. *Journal of the Royal
35//!   Statistical Society B*, 8(1), 27--41, section 3, pp. 31--33.
36//! - Ledolter, J. & Swersey, A.J. (2007). *Testing 1-2-3: Experimental
37//!   design with applications in marketing and service operations*.
38//!   Stanford University Press, Ch. 6 (profile monitoring).
39
40use crate::error::FdarError;
41use crate::function_on_scalar::{fosr, FosrResult};
42use crate::matrix::FdMatrix;
43use crate::regression::{fdata_to_pc_1d, FpcaResult};
44use crate::spm::control::{t2_control_limit, ControlLimit};
45use crate::spm::stats::hotelling_t2;
46
47/// Configuration for profile monitoring.
48///
49/// # Parameter guidance
50///
51/// - `window_size`: Must be >= p + 2 where p is the number of predictors.
52///   Larger windows give more stable beta(t) estimates but reduce temporal resolution.
53///   Typical: 20--50 for slowly varying processes. Recommended `window_size` is
54///   5p to 10p where p is the number of predictors, to ensure stable FOSR
55///   estimation within each window (the design matrix X^T X has condition number
56///   roughly proportional to window_size / p).
57/// - `step_size`: Controls window overlap. step_size = window_size gives no overlap
58///   (independent windows); step_size = 1 gives maximum overlap (smoothest tracking
59///   but highest autocorrelation). Typical: window_size/2 or window_size/4.
60#[derive(Debug, Clone, PartialEq)]
61pub struct ProfileMonitorConfig {
62    /// FOSR smoothing parameter (default 1e-4).
63    pub fosr_lambda: f64,
64    /// Number of principal components for beta-function FPCA (default 3).
65    /// Typically 2--5 suffices since coefficient functions are smoother than
66    /// raw data. Use `select_ncomp()` on the beta eigenvalues for data-driven
67    /// selection.
68    pub ncomp: usize,
69    /// Significance level (default 0.05).
70    pub alpha: f64,
71    /// Window size for rolling FOSR (default 20).
72    pub window_size: usize,
73    /// Step size between windows (default 1).
74    pub step_size: usize,
75}
76
77impl Default for ProfileMonitorConfig {
78    fn default() -> Self {
79        Self {
80            fosr_lambda: 1e-4,
81            ncomp: 3,
82            alpha: 0.05,
83            window_size: 20,
84            step_size: 1,
85        }
86    }
87}
88
89/// Phase I profile monitoring chart.
90///
91/// When `effective_n_windows` is much smaller than the actual number of
92/// windows, the chi-squared UCL may need adjustment. A practical approach:
93/// multiply the UCL by `effective_n_windows / n_windows` to approximate a
94/// Bonferroni-like correction.
95#[derive(Debug, Clone, PartialEq)]
96#[non_exhaustive]
97pub struct ProfileChart {
98    /// FOSR result from the full reference data.
99    pub reference_fosr: FosrResult,
100    /// FPCA of the rolling beta coefficient functions.
101    pub beta_fpca: FpcaResult,
102    /// Eigenvalues: sv² / (n_windows - 1).
103    pub eigenvalues: Vec<f64>,
104    /// T-squared control limit for beta monitoring.
105    pub t2_limit: ControlLimit,
106    /// Lag-1 autocorrelation of the Phase I T-squared statistics from rolling windows.
107    /// High values (> 0.5) indicate serial correlation from window overlap.
108    ///
109    /// Computed from the Phase I T-squared statistic sequence. Values |rho_1| > 0.3
110    /// indicate substantial serial correlation; `effective_n_windows` will be
111    /// notably reduced. The estimator uses the unbiased sample variance (n-1
112    /// denominator) for both variance and covariance terms.
113    pub lag1_autocorrelation: f64,
114    /// Effective number of independent windows (Bartlett correction for overlap).
115    /// When step_size < window_size, consecutive windows overlap and are
116    /// correlated, reducing the effective degrees of freedom.
117    ///
118    /// Computed as n_eff = n_windows / (1 + 2|rho_1|), which is the Bartlett
119    /// (1946, section 3) formula for AR(1) processes. For overlap fraction
120    /// f = 1 - step_size/window_size, the lag-1 autocorrelation is approximately
121    /// f, so n_eff ~ n_windows / (1 + 2f). This is conservative (underestimates
122    /// n_eff) for non-AR(1) dependence structures.
123    ///
124    /// Use this to assess whether the chi-squared UCL is reliable. When
125    /// `effective_n_windows` < 20, consider widening the control limit or
126    /// using bootstrap limits instead.
127    pub effective_n_windows: f64,
128    /// Configuration used.
129    pub config: ProfileMonitorConfig,
130}
131
132/// Result of Phase II profile monitoring.
133#[derive(Debug, Clone, PartialEq)]
134#[non_exhaustive]
135pub struct ProfileMonitorResult {
136    /// Per-window beta coefficient matrices (vectorized).
137    pub betas: FdMatrix,
138    /// T-squared values for each window.
139    pub t2: Vec<f64>,
140    /// T-squared alarm flags.
141    pub t2_alarm: Vec<bool>,
142    /// FPC scores for the beta functions.
143    pub beta_scores: FdMatrix,
144}
145
146/// Build a Phase I profile monitoring chart.
147///
148/// 1. Fits FOSR on the full training data to get reference β(t)
149/// 2. Rolls windows over the data, fitting FOSR per window
150/// 3. Vectorizes the β(t) from each window
151/// 4. Runs FPCA on the vectorized betas
152/// 5. Computes T-squared control limits
153///
154/// When `step_size > window_size`, windows do not overlap and some observations
155/// fall between windows (not monitored).
156///
157/// # Arguments
158/// * `y_curves` - Response functional data (n × m)
159/// * `predictors` - Scalar predictors (n × p)
160/// * `argvals` - Grid points (length m)
161/// * `config` - Profile monitoring configuration
162///
163/// # Errors
164///
165/// Returns errors from FOSR or FPCA computation.
166///
167/// # Example
168/// ```no_run
169/// use fdars_core::matrix::FdMatrix;
170/// use fdars_core::spm::profile::{profile_phase1, ProfileMonitorConfig};
171/// let n = 50;
172/// let m = 10;
173/// let y = FdMatrix::from_column_major(
174///     (0..n*m).map(|i| (i as f64 * 0.1).sin()).collect(), n, m
175/// ).unwrap();
176/// let pred = FdMatrix::from_column_major(
177///     (0..n).map(|i| i as f64 / n as f64).collect(), n, 1
178/// ).unwrap();
179/// let argvals: Vec<f64> = (0..m).map(|i| i as f64 / (m-1) as f64).collect();
180/// let config = ProfileMonitorConfig { window_size: 10, step_size: 5, ncomp: 2, ..ProfileMonitorConfig::default() };
181/// let chart = profile_phase1(&y, &pred, &argvals, &config).unwrap();
182/// assert!(chart.eigenvalues.len() >= 1);
183/// ```
184#[must_use = "expensive computation whose result should not be discarded"]
185pub fn profile_phase1(
186    y_curves: &FdMatrix,
187    predictors: &FdMatrix,
188    argvals: &[f64],
189    config: &ProfileMonitorConfig,
190) -> Result<ProfileChart, FdarError> {
191    let (n, m) = y_curves.shape();
192    if predictors.nrows() != n {
193        return Err(FdarError::InvalidDimension {
194            parameter: "predictors",
195            expected: format!("{n} rows"),
196            actual: format!("{} rows", predictors.nrows()),
197        });
198    }
199    if argvals.len() != m {
200        return Err(FdarError::InvalidDimension {
201            parameter: "argvals",
202            expected: format!("{m}"),
203            actual: format!("{}", argvals.len()),
204        });
205    }
206    if config.step_size == 0 {
207        return Err(FdarError::InvalidParameter {
208            parameter: "step_size",
209            message: "step_size must be at least 1".to_string(),
210        });
211    }
212    if config.window_size < 3 {
213        return Err(FdarError::InvalidParameter {
214            parameter: "window_size",
215            message: format!("window_size must be >= 3, got {}", config.window_size),
216        });
217    }
218    if config.window_size > n {
219        return Err(FdarError::InvalidParameter {
220            parameter: "window_size",
221            message: format!(
222                "window_size ({}) exceeds data size ({n})",
223                config.window_size
224            ),
225        });
226    }
227
228    // Early check: ensure enough windows before expensive FOSR fitting.
229    let expected_n_windows = if n >= config.window_size {
230        (n - config.window_size) / config.step_size + 1
231    } else {
232        0
233    };
234    if expected_n_windows < 4 {
235        return Err(FdarError::InvalidDimension {
236            parameter: "data",
237            expected: "enough data for at least 4 windows".to_string(),
238            actual: format!(
239                "{expected_n_windows} windows (n={n}, window_size={}, step_size={})",
240                config.window_size, config.step_size
241            ),
242        });
243    }
244
245    // Fit reference FOSR on full data
246    let reference_fosr = fosr(y_curves, predictors, config.fosr_lambda)?;
247
248    // Rolling windows: extract per-window FOSR betas
249    let beta_vecs = rolling_betas(y_curves, predictors, config)?;
250    let n_windows = beta_vecs.nrows();
251
252    if n_windows < 4 {
253        return Err(FdarError::InvalidDimension {
254            parameter: "data",
255            expected: "enough data for at least 4 windows".to_string(),
256            actual: format!("{n_windows} windows"),
257        });
258    }
259
260    // FPCA on vectorized betas
261    let ncomp = config.ncomp.min(n_windows - 1).min(beta_vecs.ncols());
262    let beta_m = beta_vecs.ncols();
263    let beta_argvals: Vec<f64> = (0..beta_m)
264        .map(|j| j as f64 / (beta_m - 1).max(1) as f64)
265        .collect();
266    let beta_fpca = fdata_to_pc_1d(&beta_vecs, ncomp, &beta_argvals)?;
267    let actual_ncomp = beta_fpca.scores.ncols();
268
269    // Eigenvalues
270    let eigenvalues: Vec<f64> = beta_fpca
271        .singular_values
272        .iter()
273        .take(actual_ncomp)
274        .map(|&sv| sv * sv / (n_windows as f64 - 1.0))
275        .collect();
276
277    // Compute Phase I T² for autocorrelation diagnostic
278    let phase1_t2 = hotelling_t2(&beta_fpca.scores, &eigenvalues)?;
279    // Lag-1 autocorrelation using unbiased sample variance (n-1 denominator).
280    // Both variance and covariance use the (n-1) denominator. The ratio
281    // ρ₁ = cov/var is invariant to this choice, but (n-1) gives the unbiased
282    // sample variance.
283    let lag1_autocorrelation = if phase1_t2.len() > 2 {
284        let n_t2 = phase1_t2.len();
285        let mean_t2 = phase1_t2.iter().sum::<f64>() / n_t2 as f64;
286        let var_t2: f64 = phase1_t2
287            .iter()
288            .map(|&v| (v - mean_t2).powi(2))
289            .sum::<f64>()
290            / (n_t2 - 1) as f64;
291        if var_t2 > 0.0 {
292            let cov1: f64 = (0..n_t2 - 1)
293                .map(|i| (phase1_t2[i] - mean_t2) * (phase1_t2[i + 1] - mean_t2))
294                .sum::<f64>()
295                / (n_t2 - 1) as f64;
296            (cov1 / var_t2).clamp(-1.0, 1.0)
297        } else {
298            0.0
299        }
300    } else {
301        0.0
302    };
303
304    // Bartlett (1946) effective sample size: n_eff = n / (1 + 2|ρ₁|).
305    // See Bartlett, M.S. (1946), Section 3.
306    // The Bartlett (1946) formula n_eff = n/(1 + 2ρ₁) is derived for
307    // AR(1) processes and provides a first-order correction for general
308    // stationary processes. For rolling-window statistics with overlap
309    // fraction f = 1 - step_size/window_size, the lag-1 autocorrelation
310    // is approximately f, so n_eff ≈ n/(1 + 2f). This is conservative
311    // (underestimates n_eff) for non-AR(1) dependence structures.
312    let effective_n_windows = if lag1_autocorrelation.abs() > 0.01 {
313        // Use absolute value: both positive (overlapping windows) and negative
314        // (anti-correlated) autocorrelation reduce the effective degrees of freedom.
315        let bartlett_factor = (1.0 + 2.0 * lag1_autocorrelation.abs()).max(1.0);
316        (n_windows as f64 / bartlett_factor).max(2.0)
317    } else {
318        n_windows as f64
319    };
320
321    // Control limit
322    let t2_limit = t2_control_limit(actual_ncomp, config.alpha)?;
323
324    Ok(ProfileChart {
325        reference_fosr,
326        beta_fpca,
327        eigenvalues,
328        t2_limit,
329        lag1_autocorrelation,
330        effective_n_windows,
331        config: config.clone(),
332    })
333}
334
335/// Monitor new data against a Phase I profile chart.
336///
337/// 1. Fits FOSR per rolling window on new data
338/// 2. Vectorizes β(t) and projects onto Phase I beta-FPCA
339/// 3. Computes T-squared
340///
341/// # Arguments
342/// * `chart` - Phase I profile chart
343/// * `new_y` - New response functional data (n × m)
344/// * `new_predictors` - New scalar predictors (n × p)
345/// * `argvals` - Grid points (length m)
346/// * `config` - Profile monitoring configuration
347///
348/// # Errors
349///
350/// Returns errors from FOSR or projection.
351#[must_use = "monitoring result should not be discarded"]
352pub fn profile_monitor(
353    chart: &ProfileChart,
354    new_y: &FdMatrix,
355    new_predictors: &FdMatrix,
356    _argvals: &[f64],
357    config: &ProfileMonitorConfig,
358) -> Result<ProfileMonitorResult, FdarError> {
359    if config.step_size == 0 {
360        return Err(FdarError::InvalidParameter {
361            parameter: "step_size",
362            message: "step_size must be at least 1".to_string(),
363        });
364    }
365    let n = new_y.nrows();
366    if new_predictors.nrows() != n {
367        return Err(FdarError::InvalidDimension {
368            parameter: "new_predictors",
369            expected: format!("{n} rows"),
370            actual: format!("{} rows", new_predictors.nrows()),
371        });
372    }
373    // Validate that new_y grid size matches the reference FOSR grid (m)
374    let expected_m = chart.reference_fosr.beta.ncols();
375    if new_y.ncols() != expected_m {
376        return Err(FdarError::InvalidDimension {
377            parameter: "new_y",
378            expected: format!("{expected_m} columns (grid points)"),
379            actual: format!("{} columns", new_y.ncols()),
380        });
381    }
382
383    // Rolling windows on new data
384    let beta_vecs = rolling_betas(new_y, new_predictors, config)?;
385
386    // Project onto Phase I FPCA
387    let beta_scores = chart.beta_fpca.project(&beta_vecs)?;
388
389    // T-squared
390    let t2 = hotelling_t2(&beta_scores, &chart.eigenvalues)?;
391
392    // Alarms
393    let t2_alarm: Vec<bool> = t2.iter().map(|&v| v > chart.t2_limit.ucl).collect();
394
395    Ok(ProfileMonitorResult {
396        betas: beta_vecs,
397        t2,
398        t2_alarm,
399        beta_scores,
400    })
401}
402
403/// Extract vectorized FOSR betas from rolling windows.
404///
405/// Each window must have sufficient rank for FOSR fitting. If a window fails
406/// (e.g., due to collinear predictors), the function returns an error.
407/// Ensure `window_size` is large enough relative to `p` (number of predictors).
408fn rolling_betas(
409    y_curves: &FdMatrix,
410    predictors: &FdMatrix,
411    config: &ProfileMonitorConfig,
412) -> Result<FdMatrix, FdarError> {
413    let n = y_curves.nrows();
414    let m = y_curves.ncols();
415    let p = predictors.ncols();
416
417    if config.window_size < p + 2 {
418        return Err(FdarError::InvalidParameter {
419            parameter: "window_size",
420            message: format!(
421                "window_size ({}) must be >= p + 2 = {} for FOSR fitting",
422                config.window_size,
423                p + 2
424            ),
425        });
426    }
427
428    let mut windows = Vec::new();
429    let mut start = 0;
430    while start + config.window_size <= n {
431        windows.push(start);
432        start += config.step_size;
433    }
434
435    if windows.is_empty() {
436        return Err(FdarError::InvalidDimension {
437            parameter: "data",
438            expected: format!(
439                "at least {} observations for one window",
440                config.window_size
441            ),
442            actual: format!("{n} observations"),
443        });
444    }
445
446    // Beta coefficients are vectorized in row-major order:
447    // β₁(t₁),...,β₁(t_m),β₂(t₁),...,β₂(t_m).
448    // Each row of beta_mat represents one window's complete coefficient function.
449    let beta_len = p * m;
450    let n_windows = windows.len();
451    let mut beta_mat = FdMatrix::zeros(n_windows, beta_len);
452
453    for (w_idx, &win_start) in windows.iter().enumerate() {
454        // Extract window data
455        let mut y_window = FdMatrix::zeros(config.window_size, m);
456        let mut pred_window = FdMatrix::zeros(config.window_size, p);
457        for i in 0..config.window_size {
458            for j in 0..m {
459                y_window[(i, j)] = y_curves[(win_start + i, j)];
460            }
461            for j in 0..p {
462                pred_window[(i, j)] = predictors[(win_start + i, j)];
463            }
464        }
465
466        // Fit FOSR
467        let fosr_result = fosr(&y_window, &pred_window, config.fosr_lambda)?;
468
469        // Vectorize beta (p × m) into row of beta_mat
470        // fosr_result.beta is p × m where row j = βⱼ(t)
471        for j in 0..p {
472            for t in 0..m {
473                beta_mat[(w_idx, j * m + t)] = fosr_result.beta[(j, t)];
474            }
475        }
476    }
477
478    Ok(beta_mat)
479}