fdars-core 0.13.0

Functional Data Analysis algorithms in Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
//! Profile monitoring for functional data.
//!
//! Monitors the relationship between scalar predictors and functional
//! responses over time using Function-on-Scalar Regression (FOSR).
//! Detects changes in the coefficient functions beta(t) via FPCA and T-squared.
//!
//! # Mathematical framework
//!
//! The functional response model is y_i(t) = x_i^T beta(t) + epsilon_i(t),
//! where beta(t) = [beta_1(t), ..., beta_p(t)]^T are coefficient functions.
//! Rolling FOSR estimates beta(t) within each window, producing a sequence
//! of vectorized coefficient functions. FPCA extracts the dominant modes of
//! variation in the beta(t) sequence, and T-squared monitors for shifts
//! in the score distribution.
//!
//! Beta coefficients from rolling windows are vectorized column-major:
//! [beta_1(t_1), ..., beta_1(t_m), beta_2(t_1), ..., beta_2(t_m), ...]
//! to form the FPCA input matrix. This preserves the functional structure
//! within each predictor.
//!
//! **Note on overlapping windows:** When `step_size < window_size`, consecutive
//! windows share observations, inducing serial correlation in the beta(t) estimates.
//! The `effective_n_windows` field in [`ProfileChart`] provides a Bartlett-style
//! correction for the effective degrees of freedom. Specifically, for overlapping
//! windows with step_size < window_size, the effective number of independent
//! windows is reduced. The Bartlett correction n_eff = n_windows / (1 + 2|rho_1|)
//! accounts for AR(1) autocorrelation rho_1 in the windowed statistics, where
//! rho_1 is estimated from the lag-1 autocorrelation of the T-squared sequence
//! (Bartlett, 1946, section 3, pp. 31--33).
//!
//! # References
//!
//! - Bartlett, M.S. (1946). On the theoretical specification of sampling
//!   properties of autocorrelated time series. *Journal of the Royal
//!   Statistical Society B*, 8(1), 27--41, section 3, pp. 31--33.
//! - Ledolter, J. & Swersey, A.J. (2007). *Testing 1-2-3: Experimental
//!   design with applications in marketing and service operations*.
//!   Stanford University Press, Ch. 6 (profile monitoring).

use crate::error::FdarError;
use crate::function_on_scalar::{fosr, FosrResult};
use crate::matrix::FdMatrix;
use crate::regression::{fdata_to_pc_1d, FpcaResult};
use crate::spm::control::{t2_control_limit, ControlLimit};
use crate::spm::stats::hotelling_t2;

/// Configuration for profile monitoring.
///
/// # Parameter guidance
///
/// - `window_size`: Must be >= p + 2 where p is the number of predictors.
///   Larger windows give more stable beta(t) estimates but reduce temporal resolution.
///   Typical: 20--50 for slowly varying processes. Recommended `window_size` is
///   5p to 10p where p is the number of predictors, to ensure stable FOSR
///   estimation within each window (the design matrix X^T X has condition number
///   roughly proportional to window_size / p).
/// - `step_size`: Controls window overlap. step_size = window_size gives no overlap
///   (independent windows); step_size = 1 gives maximum overlap (smoothest tracking
///   but highest autocorrelation). Typical: window_size/2 or window_size/4.
#[derive(Debug, Clone, PartialEq)]
pub struct ProfileMonitorConfig {
    /// FOSR smoothing parameter (default 1e-4).
    pub fosr_lambda: f64,
    /// Number of principal components for beta-function FPCA (default 3).
    /// Typically 2--5 suffices since coefficient functions are smoother than
    /// raw data. Use `select_ncomp()` on the beta eigenvalues for data-driven
    /// selection.
    pub ncomp: usize,
    /// Significance level (default 0.05).
    pub alpha: f64,
    /// Window size for rolling FOSR (default 20).
    pub window_size: usize,
    /// Step size between windows (default 1).
    pub step_size: usize,
}

impl Default for ProfileMonitorConfig {
    fn default() -> Self {
        Self {
            fosr_lambda: 1e-4,
            ncomp: 3,
            alpha: 0.05,
            window_size: 20,
            step_size: 1,
        }
    }
}

/// Phase I profile monitoring chart.
///
/// When `effective_n_windows` is much smaller than the actual number of
/// windows, the chi-squared UCL may need adjustment. A practical approach:
/// multiply the UCL by `effective_n_windows / n_windows` to approximate a
/// Bonferroni-like correction.
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct ProfileChart {
    /// FOSR result from the full reference data.
    pub reference_fosr: FosrResult,
    /// FPCA of the rolling beta coefficient functions.
    pub beta_fpca: FpcaResult,
    /// Eigenvalues: sv² / (n_windows - 1).
    pub eigenvalues: Vec<f64>,
    /// T-squared control limit for beta monitoring.
    pub t2_limit: ControlLimit,
    /// Lag-1 autocorrelation of the Phase I T-squared statistics from rolling windows.
    /// High values (> 0.5) indicate serial correlation from window overlap.
    ///
    /// Computed from the Phase I T-squared statistic sequence. Values |rho_1| > 0.3
    /// indicate substantial serial correlation; `effective_n_windows` will be
    /// notably reduced. The estimator uses the unbiased sample variance (n-1
    /// denominator) for both variance and covariance terms.
    pub lag1_autocorrelation: f64,
    /// Effective number of independent windows (Bartlett correction for overlap).
    /// When step_size < window_size, consecutive windows overlap and are
    /// correlated, reducing the effective degrees of freedom.
    ///
    /// Computed as n_eff = n_windows / (1 + 2|rho_1|), which is the Bartlett
    /// (1946, section 3) formula for AR(1) processes. For overlap fraction
    /// f = 1 - step_size/window_size, the lag-1 autocorrelation is approximately
    /// f, so n_eff ~ n_windows / (1 + 2f). This is conservative (underestimates
    /// n_eff) for non-AR(1) dependence structures.
    ///
    /// Use this to assess whether the chi-squared UCL is reliable. When
    /// `effective_n_windows` < 20, consider widening the control limit or
    /// using bootstrap limits instead.
    pub effective_n_windows: f64,
    /// Configuration used.
    pub config: ProfileMonitorConfig,
}

/// Result of Phase II profile monitoring.
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct ProfileMonitorResult {
    /// Per-window beta coefficient matrices (vectorized).
    pub betas: FdMatrix,
    /// T-squared values for each window.
    pub t2: Vec<f64>,
    /// T-squared alarm flags.
    pub t2_alarm: Vec<bool>,
    /// FPC scores for the beta functions.
    pub beta_scores: FdMatrix,
}

/// Build a Phase I profile monitoring chart.
///
/// 1. Fits FOSR on the full training data to get reference β(t)
/// 2. Rolls windows over the data, fitting FOSR per window
/// 3. Vectorizes the β(t) from each window
/// 4. Runs FPCA on the vectorized betas
/// 5. Computes T-squared control limits
///
/// When `step_size > window_size`, windows do not overlap and some observations
/// fall between windows (not monitored).
///
/// # Arguments
/// * `y_curves` - Response functional data (n × m)
/// * `predictors` - Scalar predictors (n × p)
/// * `argvals` - Grid points (length m)
/// * `config` - Profile monitoring configuration
///
/// # Errors
///
/// Returns errors from FOSR or FPCA computation.
///
/// # Example
/// ```no_run
/// use fdars_core::matrix::FdMatrix;
/// use fdars_core::spm::profile::{profile_phase1, ProfileMonitorConfig};
/// let n = 50;
/// let m = 10;
/// let y = FdMatrix::from_column_major(
///     (0..n*m).map(|i| (i as f64 * 0.1).sin()).collect(), n, m
/// ).unwrap();
/// let pred = FdMatrix::from_column_major(
///     (0..n).map(|i| i as f64 / n as f64).collect(), n, 1
/// ).unwrap();
/// let argvals: Vec<f64> = (0..m).map(|i| i as f64 / (m-1) as f64).collect();
/// let config = ProfileMonitorConfig { window_size: 10, step_size: 5, ncomp: 2, ..ProfileMonitorConfig::default() };
/// let chart = profile_phase1(&y, &pred, &argvals, &config).unwrap();
/// assert!(chart.eigenvalues.len() >= 1);
/// ```
#[must_use = "expensive computation whose result should not be discarded"]
pub fn profile_phase1(
    y_curves: &FdMatrix,
    predictors: &FdMatrix,
    argvals: &[f64],
    config: &ProfileMonitorConfig,
) -> Result<ProfileChart, FdarError> {
    let (n, m) = y_curves.shape();
    if predictors.nrows() != n {
        return Err(FdarError::InvalidDimension {
            parameter: "predictors",
            expected: format!("{n} rows"),
            actual: format!("{} rows", predictors.nrows()),
        });
    }
    if argvals.len() != m {
        return Err(FdarError::InvalidDimension {
            parameter: "argvals",
            expected: format!("{m}"),
            actual: format!("{}", argvals.len()),
        });
    }
    if config.step_size == 0 {
        return Err(FdarError::InvalidParameter {
            parameter: "step_size",
            message: "step_size must be at least 1".to_string(),
        });
    }
    if config.window_size < 3 {
        return Err(FdarError::InvalidParameter {
            parameter: "window_size",
            message: format!("window_size must be >= 3, got {}", config.window_size),
        });
    }
    if config.window_size > n {
        return Err(FdarError::InvalidParameter {
            parameter: "window_size",
            message: format!(
                "window_size ({}) exceeds data size ({n})",
                config.window_size
            ),
        });
    }

    // Early check: ensure enough windows before expensive FOSR fitting.
    let expected_n_windows = if n >= config.window_size {
        (n - config.window_size) / config.step_size + 1
    } else {
        0
    };
    if expected_n_windows < 4 {
        return Err(FdarError::InvalidDimension {
            parameter: "data",
            expected: "enough data for at least 4 windows".to_string(),
            actual: format!(
                "{expected_n_windows} windows (n={n}, window_size={}, step_size={})",
                config.window_size, config.step_size
            ),
        });
    }

    // Fit reference FOSR on full data
    let reference_fosr = fosr(y_curves, predictors, config.fosr_lambda)?;

    // Rolling windows: extract per-window FOSR betas
    let beta_vecs = rolling_betas(y_curves, predictors, config)?;
    let n_windows = beta_vecs.nrows();

    if n_windows < 4 {
        return Err(FdarError::InvalidDimension {
            parameter: "data",
            expected: "enough data for at least 4 windows".to_string(),
            actual: format!("{n_windows} windows"),
        });
    }

    // FPCA on vectorized betas
    let ncomp = config.ncomp.min(n_windows - 1).min(beta_vecs.ncols());
    let beta_m = beta_vecs.ncols();
    let beta_argvals: Vec<f64> = (0..beta_m)
        .map(|j| j as f64 / (beta_m - 1).max(1) as f64)
        .collect();
    let beta_fpca = fdata_to_pc_1d(&beta_vecs, ncomp, &beta_argvals)?;
    let actual_ncomp = beta_fpca.scores.ncols();

    // Eigenvalues
    let eigenvalues: Vec<f64> = beta_fpca
        .singular_values
        .iter()
        .take(actual_ncomp)
        .map(|&sv| sv * sv / (n_windows as f64 - 1.0))
        .collect();

    // Compute Phase I T² for autocorrelation diagnostic
    let phase1_t2 = hotelling_t2(&beta_fpca.scores, &eigenvalues)?;
    // Lag-1 autocorrelation using unbiased sample variance (n-1 denominator).
    // Both variance and covariance use the (n-1) denominator. The ratio
    // ρ₁ = cov/var is invariant to this choice, but (n-1) gives the unbiased
    // sample variance.
    let lag1_autocorrelation = if phase1_t2.len() > 2 {
        let n_t2 = phase1_t2.len();
        let mean_t2 = phase1_t2.iter().sum::<f64>() / n_t2 as f64;
        let var_t2: f64 = phase1_t2
            .iter()
            .map(|&v| (v - mean_t2).powi(2))
            .sum::<f64>()
            / (n_t2 - 1) as f64;
        if var_t2 > 0.0 {
            let cov1: f64 = (0..n_t2 - 1)
                .map(|i| (phase1_t2[i] - mean_t2) * (phase1_t2[i + 1] - mean_t2))
                .sum::<f64>()
                / (n_t2 - 1) as f64;
            (cov1 / var_t2).clamp(-1.0, 1.0)
        } else {
            0.0
        }
    } else {
        0.0
    };

    // Bartlett (1946) effective sample size: n_eff = n / (1 + 2|ρ₁|).
    // See Bartlett, M.S. (1946), Section 3.
    // The Bartlett (1946) formula n_eff = n/(1 + 2ρ₁) is derived for
    // AR(1) processes and provides a first-order correction for general
    // stationary processes. For rolling-window statistics with overlap
    // fraction f = 1 - step_size/window_size, the lag-1 autocorrelation
    // is approximately f, so n_eff ≈ n/(1 + 2f). This is conservative
    // (underestimates n_eff) for non-AR(1) dependence structures.
    let effective_n_windows = if lag1_autocorrelation.abs() > 0.01 {
        // Use absolute value: both positive (overlapping windows) and negative
        // (anti-correlated) autocorrelation reduce the effective degrees of freedom.
        let bartlett_factor = (1.0 + 2.0 * lag1_autocorrelation.abs()).max(1.0);
        (n_windows as f64 / bartlett_factor).max(2.0)
    } else {
        n_windows as f64
    };

    // Control limit
    let t2_limit = t2_control_limit(actual_ncomp, config.alpha)?;

    Ok(ProfileChart {
        reference_fosr,
        beta_fpca,
        eigenvalues,
        t2_limit,
        lag1_autocorrelation,
        effective_n_windows,
        config: config.clone(),
    })
}

/// Monitor new data against a Phase I profile chart.
///
/// 1. Fits FOSR per rolling window on new data
/// 2. Vectorizes β(t) and projects onto Phase I beta-FPCA
/// 3. Computes T-squared
///
/// # Arguments
/// * `chart` - Phase I profile chart
/// * `new_y` - New response functional data (n × m)
/// * `new_predictors` - New scalar predictors (n × p)
/// * `argvals` - Grid points (length m)
/// * `config` - Profile monitoring configuration
///
/// # Errors
///
/// Returns errors from FOSR or projection.
#[must_use = "monitoring result should not be discarded"]
pub fn profile_monitor(
    chart: &ProfileChart,
    new_y: &FdMatrix,
    new_predictors: &FdMatrix,
    _argvals: &[f64],
    config: &ProfileMonitorConfig,
) -> Result<ProfileMonitorResult, FdarError> {
    if config.step_size == 0 {
        return Err(FdarError::InvalidParameter {
            parameter: "step_size",
            message: "step_size must be at least 1".to_string(),
        });
    }
    let n = new_y.nrows();
    if new_predictors.nrows() != n {
        return Err(FdarError::InvalidDimension {
            parameter: "new_predictors",
            expected: format!("{n} rows"),
            actual: format!("{} rows", new_predictors.nrows()),
        });
    }
    // Validate that new_y grid size matches the reference FOSR grid (m)
    let expected_m = chart.reference_fosr.beta.ncols();
    if new_y.ncols() != expected_m {
        return Err(FdarError::InvalidDimension {
            parameter: "new_y",
            expected: format!("{expected_m} columns (grid points)"),
            actual: format!("{} columns", new_y.ncols()),
        });
    }

    // Rolling windows on new data
    let beta_vecs = rolling_betas(new_y, new_predictors, config)?;

    // Project onto Phase I FPCA
    let beta_scores = chart.beta_fpca.project(&beta_vecs)?;

    // T-squared
    let t2 = hotelling_t2(&beta_scores, &chart.eigenvalues)?;

    // Alarms
    let t2_alarm: Vec<bool> = t2.iter().map(|&v| v > chart.t2_limit.ucl).collect();

    Ok(ProfileMonitorResult {
        betas: beta_vecs,
        t2,
        t2_alarm,
        beta_scores,
    })
}

/// Extract vectorized FOSR betas from rolling windows.
///
/// Each window must have sufficient rank for FOSR fitting. If a window fails
/// (e.g., due to collinear predictors), the function returns an error.
/// Ensure `window_size` is large enough relative to `p` (number of predictors).
fn rolling_betas(
    y_curves: &FdMatrix,
    predictors: &FdMatrix,
    config: &ProfileMonitorConfig,
) -> Result<FdMatrix, FdarError> {
    let n = y_curves.nrows();
    let m = y_curves.ncols();
    let p = predictors.ncols();

    if config.window_size < p + 2 {
        return Err(FdarError::InvalidParameter {
            parameter: "window_size",
            message: format!(
                "window_size ({}) must be >= p + 2 = {} for FOSR fitting",
                config.window_size,
                p + 2
            ),
        });
    }

    let mut windows = Vec::new();
    let mut start = 0;
    while start + config.window_size <= n {
        windows.push(start);
        start += config.step_size;
    }

    if windows.is_empty() {
        return Err(FdarError::InvalidDimension {
            parameter: "data",
            expected: format!(
                "at least {} observations for one window",
                config.window_size
            ),
            actual: format!("{n} observations"),
        });
    }

    // Beta coefficients are vectorized in row-major order:
    // β₁(t₁),...,β₁(t_m),β₂(t₁),...,β₂(t_m).
    // Each row of beta_mat represents one window's complete coefficient function.
    let beta_len = p * m;
    let n_windows = windows.len();
    let mut beta_mat = FdMatrix::zeros(n_windows, beta_len);

    for (w_idx, &win_start) in windows.iter().enumerate() {
        // Extract window data
        let mut y_window = FdMatrix::zeros(config.window_size, m);
        let mut pred_window = FdMatrix::zeros(config.window_size, p);
        for i in 0..config.window_size {
            for j in 0..m {
                y_window[(i, j)] = y_curves[(win_start + i, j)];
            }
            for j in 0..p {
                pred_window[(i, j)] = predictors[(win_start + i, j)];
            }
        }

        // Fit FOSR
        let fosr_result = fosr(&y_window, &pred_window, config.fosr_lambda)?;

        // Vectorize beta (p × m) into row of beta_mat
        // fosr_result.beta is p × m where row j = βⱼ(t)
        for j in 0..p {
            for t in 0..m {
                beta_mat[(w_idx, j * m + t)] = fosr_result.beta[(j, t)];
            }
        }
    }

    Ok(beta_mat)
}