fdars_core/spm/profile.rs
1//! Profile monitoring for functional data.
2//!
3//! Monitors the relationship between scalar predictors and functional
4//! responses over time using Function-on-Scalar Regression (FOSR).
5//! Detects changes in the coefficient functions beta(t) via FPCA and T-squared.
6//!
7//! # Mathematical framework
8//!
9//! The functional response model is y_i(t) = x_i^T beta(t) + epsilon_i(t),
10//! where beta(t) = [beta_1(t), ..., beta_p(t)]^T are coefficient functions.
11//! Rolling FOSR estimates beta(t) within each window, producing a sequence
12//! of vectorized coefficient functions. FPCA extracts the dominant modes of
13//! variation in the beta(t) sequence, and T-squared monitors for shifts
14//! in the score distribution.
15//!
16//! Beta coefficients from rolling windows are vectorized column-major:
17//! [beta_1(t_1), ..., beta_1(t_m), beta_2(t_1), ..., beta_2(t_m), ...]
18//! to form the FPCA input matrix. This preserves the functional structure
19//! within each predictor.
20//!
21//! **Note on overlapping windows:** When `step_size < window_size`, consecutive
22//! windows share observations, inducing serial correlation in the beta(t) estimates.
23//! The `effective_n_windows` field in [`ProfileChart`] provides a Bartlett-style
24//! correction for the effective degrees of freedom. Specifically, for overlapping
25//! windows with step_size < window_size, the effective number of independent
26//! windows is reduced. The Bartlett correction n_eff = n_windows / (1 + 2|rho_1|)
27//! accounts for AR(1) autocorrelation rho_1 in the windowed statistics, where
28//! rho_1 is estimated from the lag-1 autocorrelation of the T-squared sequence
29//! (Bartlett, 1946, section 3, pp. 31--33).
30//!
31//! # References
32//!
33//! - Bartlett, M.S. (1946). On the theoretical specification of sampling
34//! properties of autocorrelated time series. *Journal of the Royal
35//! Statistical Society B*, 8(1), 27--41, section 3, pp. 31--33.
36//! - Ledolter, J. & Swersey, A.J. (2007). *Testing 1-2-3: Experimental
37//! design with applications in marketing and service operations*.
38//! Stanford University Press, Ch. 6 (profile monitoring).
39
40use crate::error::FdarError;
41use crate::function_on_scalar::{fosr, FosrResult};
42use crate::matrix::FdMatrix;
43use crate::regression::{fdata_to_pc_1d, FpcaResult};
44use crate::spm::control::{t2_control_limit, ControlLimit};
45use crate::spm::stats::hotelling_t2;
46
47/// Configuration for profile monitoring.
48///
49/// # Parameter guidance
50///
51/// - `window_size`: Must be >= p + 2 where p is the number of predictors.
52/// Larger windows give more stable beta(t) estimates but reduce temporal resolution.
53/// Typical: 20--50 for slowly varying processes. Recommended `window_size` is
54/// 5p to 10p where p is the number of predictors, to ensure stable FOSR
55/// estimation within each window (the design matrix X^T X has condition number
56/// roughly proportional to window_size / p).
57/// - `step_size`: Controls window overlap. step_size = window_size gives no overlap
58/// (independent windows); step_size = 1 gives maximum overlap (smoothest tracking
59/// but highest autocorrelation). Typical: window_size/2 or window_size/4.
60#[derive(Debug, Clone, PartialEq)]
61pub struct ProfileMonitorConfig {
62 /// FOSR smoothing parameter (default 1e-4).
63 pub fosr_lambda: f64,
64 /// Number of principal components for beta-function FPCA (default 3).
65 /// Typically 2--5 suffices since coefficient functions are smoother than
66 /// raw data. Use `select_ncomp()` on the beta eigenvalues for data-driven
67 /// selection.
68 pub ncomp: usize,
69 /// Significance level (default 0.05).
70 pub alpha: f64,
71 /// Window size for rolling FOSR (default 20).
72 pub window_size: usize,
73 /// Step size between windows (default 1).
74 pub step_size: usize,
75}
76
77impl Default for ProfileMonitorConfig {
78 fn default() -> Self {
79 Self {
80 fosr_lambda: 1e-4,
81 ncomp: 3,
82 alpha: 0.05,
83 window_size: 20,
84 step_size: 1,
85 }
86 }
87}
88
89/// Phase I profile monitoring chart.
90///
91/// When `effective_n_windows` is much smaller than the actual number of
92/// windows, the chi-squared UCL may need adjustment. A practical approach:
93/// multiply the UCL by `effective_n_windows / n_windows` to approximate a
94/// Bonferroni-like correction.
95#[derive(Debug, Clone, PartialEq)]
96#[non_exhaustive]
97pub struct ProfileChart {
98 /// FOSR result from the full reference data.
99 pub reference_fosr: FosrResult,
100 /// FPCA of the rolling beta coefficient functions.
101 pub beta_fpca: FpcaResult,
102 /// Eigenvalues: sv² / (n_windows - 1).
103 pub eigenvalues: Vec<f64>,
104 /// T-squared control limit for beta monitoring.
105 pub t2_limit: ControlLimit,
106 /// Lag-1 autocorrelation of the Phase I T-squared statistics from rolling windows.
107 /// High values (> 0.5) indicate serial correlation from window overlap.
108 ///
109 /// Computed from the Phase I T-squared statistic sequence. Values |rho_1| > 0.3
110 /// indicate substantial serial correlation; `effective_n_windows` will be
111 /// notably reduced. The estimator uses the unbiased sample variance (n-1
112 /// denominator) for both variance and covariance terms.
113 pub lag1_autocorrelation: f64,
114 /// Effective number of independent windows (Bartlett correction for overlap).
115 /// When step_size < window_size, consecutive windows overlap and are
116 /// correlated, reducing the effective degrees of freedom.
117 ///
118 /// Computed as n_eff = n_windows / (1 + 2|rho_1|), which is the Bartlett
119 /// (1946, section 3) formula for AR(1) processes. For overlap fraction
120 /// f = 1 - step_size/window_size, the lag-1 autocorrelation is approximately
121 /// f, so n_eff ~ n_windows / (1 + 2f). This is conservative (underestimates
122 /// n_eff) for non-AR(1) dependence structures.
123 ///
124 /// Use this to assess whether the chi-squared UCL is reliable. When
125 /// `effective_n_windows` < 20, consider widening the control limit or
126 /// using bootstrap limits instead.
127 pub effective_n_windows: f64,
128 /// Configuration used.
129 pub config: ProfileMonitorConfig,
130}
131
132/// Result of Phase II profile monitoring.
133#[derive(Debug, Clone, PartialEq)]
134#[non_exhaustive]
135pub struct ProfileMonitorResult {
136 /// Per-window beta coefficient matrices (vectorized).
137 pub betas: FdMatrix,
138 /// T-squared values for each window.
139 pub t2: Vec<f64>,
140 /// T-squared alarm flags.
141 pub t2_alarm: Vec<bool>,
142 /// FPC scores for the beta functions.
143 pub beta_scores: FdMatrix,
144}
145
146/// Build a Phase I profile monitoring chart.
147///
148/// 1. Fits FOSR on the full training data to get reference β(t)
149/// 2. Rolls windows over the data, fitting FOSR per window
150/// 3. Vectorizes the β(t) from each window
151/// 4. Runs FPCA on the vectorized betas
152/// 5. Computes T-squared control limits
153///
154/// When `step_size > window_size`, windows do not overlap and some observations
155/// fall between windows (not monitored).
156///
157/// # Arguments
158/// * `y_curves` - Response functional data (n × m)
159/// * `predictors` - Scalar predictors (n × p)
160/// * `argvals` - Grid points (length m)
161/// * `config` - Profile monitoring configuration
162///
163/// # Errors
164///
165/// Returns errors from FOSR or FPCA computation.
166///
167/// # Example
168/// ```no_run
169/// use fdars_core::matrix::FdMatrix;
170/// use fdars_core::spm::profile::{profile_phase1, ProfileMonitorConfig};
171/// let n = 50;
172/// let m = 10;
173/// let y = FdMatrix::from_column_major(
174/// (0..n*m).map(|i| (i as f64 * 0.1).sin()).collect(), n, m
175/// ).unwrap();
176/// let pred = FdMatrix::from_column_major(
177/// (0..n).map(|i| i as f64 / n as f64).collect(), n, 1
178/// ).unwrap();
179/// let argvals: Vec<f64> = (0..m).map(|i| i as f64 / (m-1) as f64).collect();
180/// let config = ProfileMonitorConfig { window_size: 10, step_size: 5, ncomp: 2, ..ProfileMonitorConfig::default() };
181/// let chart = profile_phase1(&y, &pred, &argvals, &config).unwrap();
182/// assert!(chart.eigenvalues.len() >= 1);
183/// ```
184#[must_use = "expensive computation whose result should not be discarded"]
185pub fn profile_phase1(
186 y_curves: &FdMatrix,
187 predictors: &FdMatrix,
188 argvals: &[f64],
189 config: &ProfileMonitorConfig,
190) -> Result<ProfileChart, FdarError> {
191 let (n, m) = y_curves.shape();
192 if predictors.nrows() != n {
193 return Err(FdarError::InvalidDimension {
194 parameter: "predictors",
195 expected: format!("{n} rows"),
196 actual: format!("{} rows", predictors.nrows()),
197 });
198 }
199 if argvals.len() != m {
200 return Err(FdarError::InvalidDimension {
201 parameter: "argvals",
202 expected: format!("{m}"),
203 actual: format!("{}", argvals.len()),
204 });
205 }
206 if config.step_size == 0 {
207 return Err(FdarError::InvalidParameter {
208 parameter: "step_size",
209 message: "step_size must be at least 1".to_string(),
210 });
211 }
212 if config.window_size < 3 {
213 return Err(FdarError::InvalidParameter {
214 parameter: "window_size",
215 message: format!("window_size must be >= 3, got {}", config.window_size),
216 });
217 }
218 if config.window_size > n {
219 return Err(FdarError::InvalidParameter {
220 parameter: "window_size",
221 message: format!(
222 "window_size ({}) exceeds data size ({n})",
223 config.window_size
224 ),
225 });
226 }
227
228 // Early check: ensure enough windows before expensive FOSR fitting.
229 let expected_n_windows = if n >= config.window_size {
230 (n - config.window_size) / config.step_size + 1
231 } else {
232 0
233 };
234 if expected_n_windows < 4 {
235 return Err(FdarError::InvalidDimension {
236 parameter: "data",
237 expected: "enough data for at least 4 windows".to_string(),
238 actual: format!(
239 "{expected_n_windows} windows (n={n}, window_size={}, step_size={})",
240 config.window_size, config.step_size
241 ),
242 });
243 }
244
245 // Fit reference FOSR on full data
246 let reference_fosr = fosr(y_curves, predictors, config.fosr_lambda)?;
247
248 // Rolling windows: extract per-window FOSR betas
249 let beta_vecs = rolling_betas(y_curves, predictors, config)?;
250 let n_windows = beta_vecs.nrows();
251
252 if n_windows < 4 {
253 return Err(FdarError::InvalidDimension {
254 parameter: "data",
255 expected: "enough data for at least 4 windows".to_string(),
256 actual: format!("{n_windows} windows"),
257 });
258 }
259
260 // FPCA on vectorized betas
261 let ncomp = config.ncomp.min(n_windows - 1).min(beta_vecs.ncols());
262 let beta_m = beta_vecs.ncols();
263 let beta_argvals: Vec<f64> = (0..beta_m)
264 .map(|j| j as f64 / (beta_m - 1).max(1) as f64)
265 .collect();
266 let beta_fpca = fdata_to_pc_1d(&beta_vecs, ncomp, &beta_argvals)?;
267 let actual_ncomp = beta_fpca.scores.ncols();
268
269 // Eigenvalues
270 let eigenvalues: Vec<f64> = beta_fpca
271 .singular_values
272 .iter()
273 .take(actual_ncomp)
274 .map(|&sv| sv * sv / (n_windows as f64 - 1.0))
275 .collect();
276
277 // Compute Phase I T² for autocorrelation diagnostic
278 let phase1_t2 = hotelling_t2(&beta_fpca.scores, &eigenvalues)?;
279 // Lag-1 autocorrelation using unbiased sample variance (n-1 denominator).
280 // Both variance and covariance use the (n-1) denominator. The ratio
281 // ρ₁ = cov/var is invariant to this choice, but (n-1) gives the unbiased
282 // sample variance.
283 let lag1_autocorrelation = if phase1_t2.len() > 2 {
284 let n_t2 = phase1_t2.len();
285 let mean_t2 = phase1_t2.iter().sum::<f64>() / n_t2 as f64;
286 let var_t2: f64 = phase1_t2
287 .iter()
288 .map(|&v| (v - mean_t2).powi(2))
289 .sum::<f64>()
290 / (n_t2 - 1) as f64;
291 if var_t2 > 0.0 {
292 let cov1: f64 = (0..n_t2 - 1)
293 .map(|i| (phase1_t2[i] - mean_t2) * (phase1_t2[i + 1] - mean_t2))
294 .sum::<f64>()
295 / (n_t2 - 1) as f64;
296 (cov1 / var_t2).clamp(-1.0, 1.0)
297 } else {
298 0.0
299 }
300 } else {
301 0.0
302 };
303
304 // Bartlett (1946) effective sample size: n_eff = n / (1 + 2|ρ₁|).
305 // See Bartlett, M.S. (1946), Section 3.
306 // The Bartlett (1946) formula n_eff = n/(1 + 2ρ₁) is derived for
307 // AR(1) processes and provides a first-order correction for general
308 // stationary processes. For rolling-window statistics with overlap
309 // fraction f = 1 - step_size/window_size, the lag-1 autocorrelation
310 // is approximately f, so n_eff ≈ n/(1 + 2f). This is conservative
311 // (underestimates n_eff) for non-AR(1) dependence structures.
312 let effective_n_windows = if lag1_autocorrelation.abs() > 0.01 {
313 // Use absolute value: both positive (overlapping windows) and negative
314 // (anti-correlated) autocorrelation reduce the effective degrees of freedom.
315 let bartlett_factor = (1.0 + 2.0 * lag1_autocorrelation.abs()).max(1.0);
316 (n_windows as f64 / bartlett_factor).max(2.0)
317 } else {
318 n_windows as f64
319 };
320
321 // Control limit
322 let t2_limit = t2_control_limit(actual_ncomp, config.alpha)?;
323
324 Ok(ProfileChart {
325 reference_fosr,
326 beta_fpca,
327 eigenvalues,
328 t2_limit,
329 lag1_autocorrelation,
330 effective_n_windows,
331 config: config.clone(),
332 })
333}
334
335/// Monitor new data against a Phase I profile chart.
336///
337/// 1. Fits FOSR per rolling window on new data
338/// 2. Vectorizes β(t) and projects onto Phase I beta-FPCA
339/// 3. Computes T-squared
340///
341/// # Arguments
342/// * `chart` - Phase I profile chart
343/// * `new_y` - New response functional data (n × m)
344/// * `new_predictors` - New scalar predictors (n × p)
345/// * `argvals` - Grid points (length m)
346/// * `config` - Profile monitoring configuration
347///
348/// # Errors
349///
350/// Returns errors from FOSR or projection.
351#[must_use = "monitoring result should not be discarded"]
352pub fn profile_monitor(
353 chart: &ProfileChart,
354 new_y: &FdMatrix,
355 new_predictors: &FdMatrix,
356 _argvals: &[f64],
357 config: &ProfileMonitorConfig,
358) -> Result<ProfileMonitorResult, FdarError> {
359 if config.step_size == 0 {
360 return Err(FdarError::InvalidParameter {
361 parameter: "step_size",
362 message: "step_size must be at least 1".to_string(),
363 });
364 }
365 let n = new_y.nrows();
366 if new_predictors.nrows() != n {
367 return Err(FdarError::InvalidDimension {
368 parameter: "new_predictors",
369 expected: format!("{n} rows"),
370 actual: format!("{} rows", new_predictors.nrows()),
371 });
372 }
373 // Validate that new_y grid size matches the reference FOSR grid (m)
374 let expected_m = chart.reference_fosr.beta.ncols();
375 if new_y.ncols() != expected_m {
376 return Err(FdarError::InvalidDimension {
377 parameter: "new_y",
378 expected: format!("{expected_m} columns (grid points)"),
379 actual: format!("{} columns", new_y.ncols()),
380 });
381 }
382
383 // Rolling windows on new data
384 let beta_vecs = rolling_betas(new_y, new_predictors, config)?;
385
386 // Project onto Phase I FPCA
387 let beta_scores = chart.beta_fpca.project(&beta_vecs)?;
388
389 // T-squared
390 let t2 = hotelling_t2(&beta_scores, &chart.eigenvalues)?;
391
392 // Alarms
393 let t2_alarm: Vec<bool> = t2.iter().map(|&v| v > chart.t2_limit.ucl).collect();
394
395 Ok(ProfileMonitorResult {
396 betas: beta_vecs,
397 t2,
398 t2_alarm,
399 beta_scores,
400 })
401}
402
403/// Extract vectorized FOSR betas from rolling windows.
404///
405/// Each window must have sufficient rank for FOSR fitting. If a window fails
406/// (e.g., due to collinear predictors), the function returns an error.
407/// Ensure `window_size` is large enough relative to `p` (number of predictors).
408fn rolling_betas(
409 y_curves: &FdMatrix,
410 predictors: &FdMatrix,
411 config: &ProfileMonitorConfig,
412) -> Result<FdMatrix, FdarError> {
413 let n = y_curves.nrows();
414 let m = y_curves.ncols();
415 let p = predictors.ncols();
416
417 if config.window_size < p + 2 {
418 return Err(FdarError::InvalidParameter {
419 parameter: "window_size",
420 message: format!(
421 "window_size ({}) must be >= p + 2 = {} for FOSR fitting",
422 config.window_size,
423 p + 2
424 ),
425 });
426 }
427
428 let mut windows = Vec::new();
429 let mut start = 0;
430 while start + config.window_size <= n {
431 windows.push(start);
432 start += config.step_size;
433 }
434
435 if windows.is_empty() {
436 return Err(FdarError::InvalidDimension {
437 parameter: "data",
438 expected: format!(
439 "at least {} observations for one window",
440 config.window_size
441 ),
442 actual: format!("{n} observations"),
443 });
444 }
445
446 // Beta coefficients are vectorized in row-major order:
447 // β₁(t₁),...,β₁(t_m),β₂(t₁),...,β₂(t_m).
448 // Each row of beta_mat represents one window's complete coefficient function.
449 let beta_len = p * m;
450 let n_windows = windows.len();
451 let mut beta_mat = FdMatrix::zeros(n_windows, beta_len);
452
453 for (w_idx, &win_start) in windows.iter().enumerate() {
454 // Extract window data
455 let mut y_window = FdMatrix::zeros(config.window_size, m);
456 let mut pred_window = FdMatrix::zeros(config.window_size, p);
457 for i in 0..config.window_size {
458 for j in 0..m {
459 y_window[(i, j)] = y_curves[(win_start + i, j)];
460 }
461 for j in 0..p {
462 pred_window[(i, j)] = predictors[(win_start + i, j)];
463 }
464 }
465
466 // Fit FOSR
467 let fosr_result = fosr(&y_window, &pred_window, config.fosr_lambda)?;
468
469 // Vectorize beta (p × m) into row of beta_mat
470 // fosr_result.beta is p × m where row j = βⱼ(t)
471 for j in 0..p {
472 for t in 0..m {
473 beta_mat[(w_idx, j * m + t)] = fosr_result.beta[(j, t)];
474 }
475 }
476 }
477
478 Ok(beta_mat)
479}