fdars_core/spm/profile.rs
1//! Profile monitoring for functional data.
2//!
3//! Monitors the relationship between scalar predictors and functional
4//! responses over time using Function-on-Scalar Regression (FOSR).
5//! Detects changes in the coefficient functions beta(t) via FPCA and T-squared.
6//!
7//! # Mathematical framework
8//!
9//! The functional response model is y_i(t) = x_i^T beta(t) + epsilon_i(t),
10//! where beta(t) = [beta_1(t), ..., beta_p(t)]^T are coefficient functions.
11//! Rolling FOSR estimates beta(t) within each window, producing a sequence
12//! of vectorized coefficient functions. FPCA extracts the dominant modes of
13//! variation in the beta(t) sequence, and T-squared monitors for shifts
14//! in the score distribution.
15//!
16//! Beta coefficients from rolling windows are vectorized column-major:
17//! [beta_1(t_1), ..., beta_1(t_m), beta_2(t_1), ..., beta_2(t_m), ...]
18//! to form the FPCA input matrix. This preserves the functional structure
19//! within each predictor.
20//!
21//! **Note on overlapping windows:** When `step_size < window_size`, consecutive
22//! windows share observations, inducing serial correlation in the beta(t) estimates.
23//! The `effective_n_windows` field in [`ProfileChart`] provides a Bartlett-style
24//! correction for the effective degrees of freedom. Specifically, for overlapping
25//! windows with step_size < window_size, the effective number of independent
26//! windows is reduced. The Bartlett correction n_eff = n_windows / (1 + 2|rho_1|)
27//! accounts for AR(1) autocorrelation rho_1 in the windowed statistics, where
28//! rho_1 is estimated from the lag-1 autocorrelation of the T-squared sequence
29//! (Bartlett, 1946, section 3, pp. 31--33).
30//!
31//! # References
32//!
33//! - Bartlett, M.S. (1946). On the theoretical specification of sampling
34//! properties of autocorrelated time series. *Journal of the Royal
35//! Statistical Society B*, 8(1), 27--41, section 3, pp. 31--33.
36//! - Ledolter, J. & Swersey, A.J. (2007). *Testing 1-2-3: Experimental
37//! design with applications in marketing and service operations*.
38//! Stanford University Press, Ch. 6 (profile monitoring).
39
40use crate::error::FdarError;
41use crate::function_on_scalar::{fosr, FosrResult};
42use crate::matrix::FdMatrix;
43use crate::regression::{fdata_to_pc_1d, FpcaResult};
44use crate::spm::control::{t2_control_limit, ControlLimit};
45use crate::spm::stats::hotelling_t2;
46
47/// Configuration for profile monitoring.
48///
49/// # Parameter guidance
50///
51/// - `window_size`: Must be >= p + 2 where p is the number of predictors.
52/// Larger windows give more stable beta(t) estimates but reduce temporal resolution.
53/// Typical: 20--50 for slowly varying processes. Recommended `window_size` is
54/// 5p to 10p where p is the number of predictors, to ensure stable FOSR
55/// estimation within each window (the design matrix X^T X has condition number
56/// roughly proportional to window_size / p).
57/// - `step_size`: Controls window overlap. step_size = window_size gives no overlap
58/// (independent windows); step_size = 1 gives maximum overlap (smoothest tracking
59/// but highest autocorrelation). Typical: window_size/2 or window_size/4.
60#[derive(Debug, Clone, PartialEq)]
61pub struct ProfileMonitorConfig {
62 /// FOSR smoothing parameter (default 1e-4).
63 pub fosr_lambda: f64,
64 /// Number of principal components for beta-function FPCA (default 3).
65 /// Typically 2--5 suffices since coefficient functions are smoother than
66 /// raw data. Use `select_ncomp()` on the beta eigenvalues for data-driven
67 /// selection.
68 pub ncomp: usize,
69 /// Significance level (default 0.05).
70 pub alpha: f64,
71 /// Window size for rolling FOSR (default 20).
72 pub window_size: usize,
73 /// Step size between windows (default 1).
74 pub step_size: usize,
75}
76
77impl Default for ProfileMonitorConfig {
78 fn default() -> Self {
79 Self {
80 fosr_lambda: 1e-4,
81 ncomp: 3,
82 alpha: 0.05,
83 window_size: 20,
84 step_size: 1,
85 }
86 }
87}
88
89/// Phase I profile monitoring chart.
90///
91/// When `effective_n_windows` is much smaller than the actual number of
92/// windows, the chi-squared UCL may need adjustment. A practical approach:
93/// multiply the UCL by `effective_n_windows / n_windows` to approximate a
94/// Bonferroni-like correction.
95#[derive(Debug, Clone, PartialEq)]
96#[non_exhaustive]
97pub struct ProfileChart {
98 /// FOSR result from the full reference data.
99 pub reference_fosr: FosrResult,
100 /// FPCA of the rolling beta coefficient functions.
101 pub beta_fpca: FpcaResult,
102 /// Eigenvalues: sv² / (n_windows - 1).
103 pub eigenvalues: Vec<f64>,
104 /// T-squared control limit for beta monitoring.
105 pub t2_limit: ControlLimit,
106 /// Lag-1 autocorrelation of the Phase I T-squared statistics from rolling windows.
107 /// High values (> 0.5) indicate serial correlation from window overlap.
108 ///
109 /// Computed from the Phase I T-squared statistic sequence. Values |rho_1| > 0.3
110 /// indicate substantial serial correlation; `effective_n_windows` will be
111 /// notably reduced. The estimator uses the unbiased sample variance (n-1
112 /// denominator) for both variance and covariance terms.
113 pub lag1_autocorrelation: f64,
114 /// Effective number of independent windows (Bartlett correction for overlap).
115 /// When step_size < window_size, consecutive windows overlap and are
116 /// correlated, reducing the effective degrees of freedom.
117 ///
118 /// Computed as n_eff = n_windows / (1 + 2|rho_1|), which is the Bartlett
119 /// (1946, section 3) formula for AR(1) processes. For overlap fraction
120 /// f = 1 - step_size/window_size, the lag-1 autocorrelation is approximately
121 /// f, so n_eff ~ n_windows / (1 + 2f). This is conservative (underestimates
122 /// n_eff) for non-AR(1) dependence structures.
123 ///
124 /// Use this to assess whether the chi-squared UCL is reliable. When
125 /// `effective_n_windows` < 20, consider widening the control limit or
126 /// using bootstrap limits instead.
127 pub effective_n_windows: f64,
128 /// Configuration used.
129 pub config: ProfileMonitorConfig,
130}
131
132/// Result of Phase II profile monitoring.
133#[derive(Debug, Clone, PartialEq)]
134#[non_exhaustive]
135pub struct ProfileMonitorResult {
136 /// Per-window beta coefficient matrices (vectorized).
137 pub betas: FdMatrix,
138 /// T-squared values for each window.
139 pub t2: Vec<f64>,
140 /// T-squared alarm flags.
141 pub t2_alarm: Vec<bool>,
142 /// FPC scores for the beta functions.
143 pub beta_scores: FdMatrix,
144}
145
146/// Build a Phase I profile monitoring chart.
147///
148/// 1. Fits FOSR on the full training data to get reference β(t)
149/// 2. Rolls windows over the data, fitting FOSR per window
150/// 3. Vectorizes the β(t) from each window
151/// 4. Runs FPCA on the vectorized betas
152/// 5. Computes T-squared control limits
153///
154/// When `step_size > window_size`, windows do not overlap and some observations
155/// fall between windows (not monitored).
156///
157/// # Arguments
158/// * `y_curves` - Response functional data (n × m)
159/// * `predictors` - Scalar predictors (n × p)
160/// * `argvals` - Grid points (length m)
161/// * `config` - Profile monitoring configuration
162///
163/// # Errors
164///
165/// Returns errors from FOSR or FPCA computation.
166///
167/// # Example
168/// ```no_run
169/// use fdars_core::matrix::FdMatrix;
170/// use fdars_core::spm::profile::{profile_phase1, ProfileMonitorConfig};
171/// let n = 50;
172/// let m = 10;
173/// let y = FdMatrix::from_column_major(
174/// (0..n*m).map(|i| (i as f64 * 0.1).sin()).collect(), n, m
175/// ).unwrap();
176/// let pred = FdMatrix::from_column_major(
177/// (0..n).map(|i| i as f64 / n as f64).collect(), n, 1
178/// ).unwrap();
179/// let argvals: Vec<f64> = (0..m).map(|i| i as f64 / (m-1) as f64).collect();
180/// let config = ProfileMonitorConfig { window_size: 10, step_size: 5, ncomp: 2, ..ProfileMonitorConfig::default() };
181/// let chart = profile_phase1(&y, &pred, &argvals, &config).unwrap();
182/// assert!(chart.eigenvalues.len() >= 1);
183/// ```
184#[must_use = "expensive computation whose result should not be discarded"]
185pub fn profile_phase1(
186 y_curves: &FdMatrix,
187 predictors: &FdMatrix,
188 argvals: &[f64],
189 config: &ProfileMonitorConfig,
190) -> Result<ProfileChart, FdarError> {
191 let (n, m) = y_curves.shape();
192 if predictors.nrows() != n {
193 return Err(FdarError::InvalidDimension {
194 parameter: "predictors",
195 expected: format!("{n} rows"),
196 actual: format!("{} rows", predictors.nrows()),
197 });
198 }
199 if argvals.len() != m {
200 return Err(FdarError::InvalidDimension {
201 parameter: "argvals",
202 expected: format!("{m}"),
203 actual: format!("{}", argvals.len()),
204 });
205 }
206 if config.step_size == 0 {
207 return Err(FdarError::InvalidParameter {
208 parameter: "step_size",
209 message: "step_size must be at least 1".to_string(),
210 });
211 }
212 if config.window_size < 3 {
213 return Err(FdarError::InvalidParameter {
214 parameter: "window_size",
215 message: format!("window_size must be >= 3, got {}", config.window_size),
216 });
217 }
218 if config.window_size > n {
219 return Err(FdarError::InvalidParameter {
220 parameter: "window_size",
221 message: format!(
222 "window_size ({}) exceeds data size ({n})",
223 config.window_size
224 ),
225 });
226 }
227
228 // Early check: ensure enough windows before expensive FOSR fitting.
229 let expected_n_windows = if n >= config.window_size {
230 (n - config.window_size) / config.step_size + 1
231 } else {
232 0
233 };
234 if expected_n_windows < 4 {
235 return Err(FdarError::InvalidDimension {
236 parameter: "data",
237 expected: "enough data for at least 4 windows".to_string(),
238 actual: format!(
239 "{expected_n_windows} windows (n={n}, window_size={}, step_size={})",
240 config.window_size, config.step_size
241 ),
242 });
243 }
244
245 // Fit reference FOSR on full data
246 let reference_fosr = fosr(y_curves, predictors, config.fosr_lambda)?;
247
248 // Rolling windows: extract per-window FOSR betas
249 let beta_vecs = rolling_betas(y_curves, predictors, config)?;
250 let n_windows = beta_vecs.nrows();
251
252 if n_windows < 4 {
253 return Err(FdarError::InvalidDimension {
254 parameter: "data",
255 expected: "enough data for at least 4 windows".to_string(),
256 actual: format!("{n_windows} windows"),
257 });
258 }
259
260 // FPCA on vectorized betas
261 let ncomp = config.ncomp.min(n_windows - 1).min(beta_vecs.ncols());
262 let beta_fpca = fdata_to_pc_1d(&beta_vecs, ncomp)?;
263 let actual_ncomp = beta_fpca.scores.ncols();
264
265 // Eigenvalues
266 let eigenvalues: Vec<f64> = beta_fpca
267 .singular_values
268 .iter()
269 .take(actual_ncomp)
270 .map(|&sv| sv * sv / (n_windows as f64 - 1.0))
271 .collect();
272
273 // Compute Phase I T² for autocorrelation diagnostic
274 let phase1_t2 = hotelling_t2(&beta_fpca.scores, &eigenvalues)?;
275 // Lag-1 autocorrelation using unbiased sample variance (n-1 denominator).
276 // Both variance and covariance use the (n-1) denominator. The ratio
277 // ρ₁ = cov/var is invariant to this choice, but (n-1) gives the unbiased
278 // sample variance.
279 let lag1_autocorrelation = if phase1_t2.len() > 2 {
280 let n_t2 = phase1_t2.len();
281 let mean_t2 = phase1_t2.iter().sum::<f64>() / n_t2 as f64;
282 let var_t2: f64 = phase1_t2
283 .iter()
284 .map(|&v| (v - mean_t2).powi(2))
285 .sum::<f64>()
286 / (n_t2 - 1) as f64;
287 if var_t2 > 0.0 {
288 let cov1: f64 = (0..n_t2 - 1)
289 .map(|i| (phase1_t2[i] - mean_t2) * (phase1_t2[i + 1] - mean_t2))
290 .sum::<f64>()
291 / (n_t2 - 1) as f64;
292 (cov1 / var_t2).clamp(-1.0, 1.0)
293 } else {
294 0.0
295 }
296 } else {
297 0.0
298 };
299
300 // Bartlett (1946) effective sample size: n_eff = n / (1 + 2|ρ₁|).
301 // See Bartlett, M.S. (1946), Section 3.
302 // The Bartlett (1946) formula n_eff = n/(1 + 2ρ₁) is derived for
303 // AR(1) processes and provides a first-order correction for general
304 // stationary processes. For rolling-window statistics with overlap
305 // fraction f = 1 - step_size/window_size, the lag-1 autocorrelation
306 // is approximately f, so n_eff ≈ n/(1 + 2f). This is conservative
307 // (underestimates n_eff) for non-AR(1) dependence structures.
308 let effective_n_windows = if lag1_autocorrelation.abs() > 0.01 {
309 // Use absolute value: both positive (overlapping windows) and negative
310 // (anti-correlated) autocorrelation reduce the effective degrees of freedom.
311 let bartlett_factor = (1.0 + 2.0 * lag1_autocorrelation.abs()).max(1.0);
312 (n_windows as f64 / bartlett_factor).max(2.0)
313 } else {
314 n_windows as f64
315 };
316
317 // Control limit
318 let t2_limit = t2_control_limit(actual_ncomp, config.alpha)?;
319
320 Ok(ProfileChart {
321 reference_fosr,
322 beta_fpca,
323 eigenvalues,
324 t2_limit,
325 lag1_autocorrelation,
326 effective_n_windows,
327 config: config.clone(),
328 })
329}
330
331/// Monitor new data against a Phase I profile chart.
332///
333/// 1. Fits FOSR per rolling window on new data
334/// 2. Vectorizes β(t) and projects onto Phase I beta-FPCA
335/// 3. Computes T-squared
336///
337/// # Arguments
338/// * `chart` - Phase I profile chart
339/// * `new_y` - New response functional data (n × m)
340/// * `new_predictors` - New scalar predictors (n × p)
341/// * `argvals` - Grid points (length m)
342/// * `config` - Profile monitoring configuration
343///
344/// # Errors
345///
346/// Returns errors from FOSR or projection.
347#[must_use = "monitoring result should not be discarded"]
348pub fn profile_monitor(
349 chart: &ProfileChart,
350 new_y: &FdMatrix,
351 new_predictors: &FdMatrix,
352 _argvals: &[f64],
353 config: &ProfileMonitorConfig,
354) -> Result<ProfileMonitorResult, FdarError> {
355 if config.step_size == 0 {
356 return Err(FdarError::InvalidParameter {
357 parameter: "step_size",
358 message: "step_size must be at least 1".to_string(),
359 });
360 }
361 let n = new_y.nrows();
362 if new_predictors.nrows() != n {
363 return Err(FdarError::InvalidDimension {
364 parameter: "new_predictors",
365 expected: format!("{n} rows"),
366 actual: format!("{} rows", new_predictors.nrows()),
367 });
368 }
369 // Validate that new_y grid size matches the reference FOSR grid (m)
370 let expected_m = chart.reference_fosr.beta.ncols();
371 if new_y.ncols() != expected_m {
372 return Err(FdarError::InvalidDimension {
373 parameter: "new_y",
374 expected: format!("{expected_m} columns (grid points)"),
375 actual: format!("{} columns", new_y.ncols()),
376 });
377 }
378
379 // Rolling windows on new data
380 let beta_vecs = rolling_betas(new_y, new_predictors, config)?;
381
382 // Project onto Phase I FPCA
383 let beta_scores = chart.beta_fpca.project(&beta_vecs)?;
384
385 // T-squared
386 let t2 = hotelling_t2(&beta_scores, &chart.eigenvalues)?;
387
388 // Alarms
389 let t2_alarm: Vec<bool> = t2.iter().map(|&v| v > chart.t2_limit.ucl).collect();
390
391 Ok(ProfileMonitorResult {
392 betas: beta_vecs,
393 t2,
394 t2_alarm,
395 beta_scores,
396 })
397}
398
399/// Extract vectorized FOSR betas from rolling windows.
400///
401/// Each window must have sufficient rank for FOSR fitting. If a window fails
402/// (e.g., due to collinear predictors), the function returns an error.
403/// Ensure `window_size` is large enough relative to `p` (number of predictors).
404fn rolling_betas(
405 y_curves: &FdMatrix,
406 predictors: &FdMatrix,
407 config: &ProfileMonitorConfig,
408) -> Result<FdMatrix, FdarError> {
409 let n = y_curves.nrows();
410 let m = y_curves.ncols();
411 let p = predictors.ncols();
412
413 if config.window_size < p + 2 {
414 return Err(FdarError::InvalidParameter {
415 parameter: "window_size",
416 message: format!(
417 "window_size ({}) must be >= p + 2 = {} for FOSR fitting",
418 config.window_size,
419 p + 2
420 ),
421 });
422 }
423
424 let mut windows = Vec::new();
425 let mut start = 0;
426 while start + config.window_size <= n {
427 windows.push(start);
428 start += config.step_size;
429 }
430
431 if windows.is_empty() {
432 return Err(FdarError::InvalidDimension {
433 parameter: "data",
434 expected: format!(
435 "at least {} observations for one window",
436 config.window_size
437 ),
438 actual: format!("{n} observations"),
439 });
440 }
441
442 // Beta coefficients are vectorized in row-major order:
443 // β₁(t₁),...,β₁(t_m),β₂(t₁),...,β₂(t_m).
444 // Each row of beta_mat represents one window's complete coefficient function.
445 let beta_len = p * m;
446 let n_windows = windows.len();
447 let mut beta_mat = FdMatrix::zeros(n_windows, beta_len);
448
449 for (w_idx, &win_start) in windows.iter().enumerate() {
450 // Extract window data
451 let mut y_window = FdMatrix::zeros(config.window_size, m);
452 let mut pred_window = FdMatrix::zeros(config.window_size, p);
453 for i in 0..config.window_size {
454 for j in 0..m {
455 y_window[(i, j)] = y_curves[(win_start + i, j)];
456 }
457 for j in 0..p {
458 pred_window[(i, j)] = predictors[(win_start + i, j)];
459 }
460 }
461
462 // Fit FOSR
463 let fosr_result = fosr(&y_window, &pred_window, config.fosr_lambda)?;
464
465 // Vectorize beta (p × m) into row of beta_mat
466 // fosr_result.beta is p × m where row j = βⱼ(t)
467 for j in 0..p {
468 for t in 0..m {
469 beta_mat[(w_idx, j * m + t)] = fosr_result.beta[(j, t)];
470 }
471 }
472 }
473
474 Ok(beta_mat)
475}