fdars-core 0.10.0

Functional Data Analysis algorithms in Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
//! Contribution diagnostics for SPM.
//!
//! When an alarm is triggered, these functions identify which functional
//! variables contribute most to the elevated T-squared or SPE statistic.
//! This aids in root-cause analysis of process faults.
//!
//! # References
//!
//! - Kourti, T. & MacGregor, J.F. (1996). Multivariate SPC methods for
//!   process and product monitoring. *Journal of Quality Technology*, 28(4),
//!   409-428, Section 3, pp. 413–416.
//! - Westerhuis, J.A., Gurden, S.P. & Smilde, A.K. (2000). Generalized
//!   contribution plots in multivariate statistical process monitoring.
//!   *Chemometrics and Intelligent Laboratory Systems*, 51(1), 95-114, Eq. 8, p. 100.

use crate::error::FdarError;
use crate::helpers::simpsons_weights;
use crate::matrix::FdMatrix;

use super::chi_squared::chi2_quantile;
use super::mfpca::MfpcaResult;

/// Compute per-variable T-squared contributions.
///
/// For a multivariate SPM setting, splits the overall T-squared into
/// per-variable contributions by grouping score components by variable.
///
/// Returns an n x p matrix where element (i, v) is the T-squared
/// contribution of variable v for observation i.
///
/// # Decomposition property
///
/// For `t2_contributions_mfpca`: T²_total = sum_v T²_v when using
/// MFPCA-weighted contributions. For the grid-proportional contributions
/// computed here, this holds approximately: the error is bounded by
/// |T²_total - sum_v T²_v| <= T²_total * max_l |w_l^{approx} - w_l^{exact}|
/// where w_l are the per-PC variable weights (Westerhuis et al., 2000, Eq. 8).
///
/// # Arguments
/// * `scores` - Score matrix (n x ncomp)
/// * `eigenvalues` - Eigenvalues (length ncomp)
/// * `grid_sizes` - Number of grid points per variable (length p), used to
///   define blocks of consecutive score components belonging to each variable.
///   The total sum of components assigned must equal ncomp.
///
/// # Note
///
/// For univariate SPM, the contribution is simply the per-component T-squared.
/// For multivariate SPM, components are assigned to variables proportionally
/// to their grid sizes. For more accurate decomposition when variables have
/// heterogeneous eigenfunction energy, use [`t2_contributions_mfpca()`] which
/// computes the actual per-variable eigenfunction norms.
///
/// Non-positive eigenvalues are skipped (their T-squared contribution is
/// undefined). If this occurs, the row sums will be less than the full
/// Hotelling T-squared.
///
/// # Errors
///
/// Returns [`FdarError::InvalidDimension`] if shapes are inconsistent.
#[must_use = "contributions should not be discarded"]
pub fn t2_contributions(
    scores: &FdMatrix,
    eigenvalues: &[f64],
    grid_sizes: &[usize],
) -> Result<FdMatrix, FdarError> {
    let (n, ncomp) = scores.shape();
    if ncomp != eigenvalues.len() {
        return Err(FdarError::InvalidDimension {
            parameter: "eigenvalues",
            expected: format!("{ncomp}"),
            actual: format!("{}", eigenvalues.len()),
        });
    }

    let p = grid_sizes.len();
    if p == 0 {
        return Err(FdarError::InvalidDimension {
            parameter: "grid_sizes",
            expected: "at least 1 variable".to_string(),
            actual: "0 variables".to_string(),
        });
    }

    // Proportional allocation distributes each PC's T² contribution to variables
    // in proportion to grid sizes. This approximates the true decomposition
    // T²_v ≈ Σ_l (score_l² / eigenvalue_l) · ||ψ_l^(v)||² / ||ψ_l||² under
    // the assumption that per-variable eigenfunction norms ||ψ_l^(v)||² are
    // proportional to grid sizes. This holds exactly when eigenfunctions have
    // uniform energy density across the domain. For a more accurate
    // decomposition, use t2_contributions_mfpca() which computes the actual
    // per-variable eigenfunction norms (Westerhuis et al., 2000).
    let total_grid: usize = grid_sizes.iter().sum();

    let mut contrib = FdMatrix::zeros(n, p);

    for i in 0..n {
        for l in 0..ncomp {
            if eigenvalues[l] <= 0.0 {
                continue;
            }
            let comp_contrib = scores[(i, l)] * scores[(i, l)] / eigenvalues[l];

            // Distribute this component's contribution across variables
            // proportionally to grid sizes
            for v in 0..p {
                let weight = grid_sizes[v] as f64 / total_grid as f64;
                contrib[(i, v)] += comp_contrib * weight;
            }
        }
    }

    Ok(contrib)
}

/// Compute per-variable SPE contributions.
///
/// Returns an n x p matrix where element (i, v) is the integrated squared
/// reconstruction error for variable v for observation i.
/// Row sums give the total SPE per observation, which should match
/// `spe_univariate` or `spe_multivariate` (up to numerical precision).
///
/// # Arguments
/// * `standardized_vars` - Per-variable standardized centered data
/// * `reconstructed_vars` - Per-variable reconstructed data
/// * `argvals_list` - Per-variable grid points
///
/// # Errors
///
/// Returns [`FdarError::InvalidDimension`] if array lengths are inconsistent.
#[must_use = "contributions should not be discarded"]
pub fn spe_contributions(
    standardized_vars: &[&FdMatrix],
    reconstructed_vars: &[&FdMatrix],
    argvals_list: &[&[f64]],
) -> Result<FdMatrix, FdarError> {
    let p = standardized_vars.len();
    if reconstructed_vars.len() != p || argvals_list.len() != p {
        return Err(FdarError::InvalidDimension {
            parameter: "variables",
            expected: format!("{p} for all arguments"),
            actual: format!(
                "std={}, recon={}, argvals={}",
                standardized_vars.len(),
                reconstructed_vars.len(),
                argvals_list.len()
            ),
        });
    }

    if p == 0 {
        return Ok(FdMatrix::zeros(0, 0));
    }

    let n = standardized_vars[0].nrows();

    let mut contrib = FdMatrix::zeros(n, p);

    for v in 0..p {
        let std_v = standardized_vars[v];
        let rec_v = reconstructed_vars[v];
        let argvals = argvals_list[v];

        if std_v.nrows() != n || rec_v.nrows() != n {
            return Err(FdarError::InvalidDimension {
                parameter: "variables",
                expected: format!("{n} rows for variable {v}"),
                actual: format!("std={}, recon={}", std_v.nrows(), rec_v.nrows()),
            });
        }

        let m_v = std_v.ncols();
        if rec_v.ncols() != m_v || argvals.len() != m_v {
            return Err(FdarError::InvalidDimension {
                parameter: "argvals_list",
                expected: format!("{m_v} for variable {v}"),
                actual: format!(
                    "recon_cols={}, argvals_len={}",
                    rec_v.ncols(),
                    argvals.len()
                ),
            });
        }

        let weights = simpsons_weights(argvals);

        for i in 0..n {
            let mut var_spe = 0.0;
            for j in 0..m_v {
                let diff = std_v[(i, j)] - rec_v[(i, j)];
                var_spe += diff * diff * weights[j];
            }
            contrib[(i, v)] = var_spe;
        }
    }

    Ok(contrib)
}

/// Compute per-PC T-squared contributions.
///
/// Returns an n × ncomp matrix where element (i, l) is the contribution
/// of principal component l to the T-squared statistic for observation i.
///
/// This is the exact decomposition: sum_l contrib(i, l) = T²(i) with
/// equality. Each component follows a chi²(1) distribution under
/// in-control conditions (Kourti & MacGregor, 1996, Section 3, p. 414).
///
/// # Arguments
/// * `scores` - Score matrix (n × ncomp)
/// * `eigenvalues` - Eigenvalues (length ncomp)
///
/// # Example
///
/// ```
/// use fdars_core::matrix::FdMatrix;
/// use fdars_core::spm::contrib::t2_pc_contributions;
/// let scores = FdMatrix::from_column_major(vec![3.0, 1.0, 2.0, 0.5], 2, 2).unwrap();
/// let eigenvalues = vec![2.0, 1.0];
/// let contrib = t2_pc_contributions(&scores, &eigenvalues).unwrap();
/// // Row sums equal Hotelling T²
/// let t2_obs0 = contrib[(0, 0)] + contrib[(0, 1)]; // 3²/2 + 2²/1 = 8.5
/// assert!((t2_obs0 - 8.5).abs() < 1e-10);
/// ```
///
/// # Errors
///
/// Returns [`FdarError::InvalidDimension`] if shapes are inconsistent.
/// Returns [`FdarError::InvalidParameter`] if any eigenvalue is non-positive.
#[must_use = "contributions should not be discarded"]
pub fn t2_pc_contributions(scores: &FdMatrix, eigenvalues: &[f64]) -> Result<FdMatrix, FdarError> {
    let (n, ncomp) = scores.shape();
    if ncomp != eigenvalues.len() {
        return Err(FdarError::InvalidDimension {
            parameter: "eigenvalues",
            expected: format!("{ncomp}"),
            actual: format!("{}", eigenvalues.len()),
        });
    }
    for (l, &ev) in eigenvalues.iter().enumerate() {
        if ev <= 0.0 {
            return Err(FdarError::InvalidParameter {
                parameter: "eigenvalues",
                message: format!("eigenvalue[{l}] = {ev} must be positive"),
            });
        }
    }

    let mut contrib = FdMatrix::zeros(n, ncomp);
    for i in 0..n {
        for l in 0..ncomp {
            contrib[(i, l)] = scores[(i, l)] * scores[(i, l)] / eigenvalues[l];
        }
    }

    Ok(contrib)
}

/// Compute per-variable T-squared contributions using MFPCA eigenfunctions.
///
/// This is the recommended method for multivariate SPM. Uses the squared
/// loading norms from each variable's block of the MFPCA eigenfunctions to
/// weight each PC's contribution. This is more accurate than grid-size
/// proportional allocation (in [`t2_contributions`]) because it accounts for
/// how much each variable actually contributes to each principal component.
/// The proportional allocation in `t2_contributions` is a fast approximation
/// suitable for variables with similar eigenfunction structure.
///
/// Returns an n × p matrix where element (i, v) is the T² contribution
/// of variable v for observation i.
///
/// # Arguments
/// * `scores` - Score matrix (n × ncomp)
/// * `mfpca` - MFPCA result (provides eigenvalues and eigenfunctions)
///
/// # Errors
///
/// Returns [`FdarError::InvalidDimension`] if shapes are inconsistent.
#[must_use = "contributions should not be discarded"]
pub fn t2_contributions_mfpca(
    scores: &FdMatrix,
    mfpca: &MfpcaResult,
) -> Result<FdMatrix, FdarError> {
    let (n, ncomp) = scores.shape();
    let eigenvalues = &mfpca.eigenvalues;

    if ncomp > eigenvalues.len() {
        return Err(FdarError::InvalidDimension {
            parameter: "scores",
            expected: format!("at most {} columns", eigenvalues.len()),
            actual: format!("{ncomp} columns"),
        });
    }

    let p = mfpca.eigenfunctions.len();
    if p == 0 {
        return Err(FdarError::InvalidDimension {
            parameter: "eigenfunctions",
            expected: "at least 1 variable".to_string(),
            actual: "0 variables".to_string(),
        });
    }

    // Compute loading weights: for each PC l and variable v,
    // weight[v][l] = sum_j (eigenfunction_v[j, l])^2 / sum_v' sum_j (eigenfunction_v'[j, l])^2
    let mut weights = vec![vec![0.0_f64; ncomp]; p];
    for l in 0..ncomp {
        let mut total_norm = 0.0;
        let mut var_norms = vec![0.0_f64; p];
        for (v, ef) in mfpca.eigenfunctions.iter().enumerate() {
            let m_v = ef.nrows();
            let ef_ncomp = ef.ncols().min(ncomp);
            if l < ef_ncomp {
                let norm_sq: f64 = (0..m_v).map(|j| ef[(j, l)] * ef[(j, l)]).sum();
                var_norms[v] = norm_sq;
                total_norm += norm_sq;
            }
        }
        if total_norm > 0.0 {
            for v in 0..p {
                weights[v][l] = var_norms[v] / total_norm;
            }
        }
    }

    // Compute contributions
    let mut contrib = FdMatrix::zeros(n, p);
    for i in 0..n {
        for l in 0..ncomp {
            if eigenvalues[l] <= 0.0 {
                continue;
            }
            let comp_contrib = scores[(i, l)] * scores[(i, l)] / eigenvalues[l];
            for v in 0..p {
                contrib[(i, v)] += comp_contrib * weights[v][l];
            }
        }
    }

    Ok(contrib)
}

/// Test per-PC T² contributions for Bonferroni-adjusted significance.
///
/// Returns an n × ncomp matrix of booleans (as 0.0/1.0) indicating whether
/// each PC's contribution exceeds the Bonferroni-corrected threshold
/// `chi²(1, 1 - alpha/ncomp)`. This helps identify which principal components
/// drive an overall T² alarm without inflating the family-wise error rate.
///
/// The Bonferroni correction ensures P(any false positive) <= alpha across
/// all ncomp tests. For ncomp > 10, consider Holm-Bonferroni (step-down)
/// which has identical family-wise error control but higher power: reject
/// if sorted p-values satisfy p_(k) <= alpha/(ncomp - k + 1).
///
/// This is conservative: with many PCs, the per-component threshold increases
/// and individual effects may be missed. For ncomp > 10, consider using
/// Holm-Bonferroni (step-down) or Benjamini-Hochberg (FDR) procedures
/// externally on the per-PC p-values. For exploratory analysis where
/// family-wise error control is less critical, consider using alpha directly
/// (without Bonferroni correction) and interpreting results as suggestive.
///
/// # Arguments
/// * `contributions` - Per-PC T² contributions (n × ncomp), from [`t2_pc_contributions`]
/// * `alpha` - Family-wise significance level (e.g. 0.05)
///
/// # Example
///
/// ```
/// use fdars_core::matrix::FdMatrix;
/// use fdars_core::spm::contrib::t2_pc_significance;
/// let contributions = FdMatrix::from_column_major(vec![50.0, 0.1], 1, 2).unwrap();
/// let sig = t2_pc_significance(&contributions, 0.05).unwrap();
/// assert_eq!(sig[(0, 0)], 1.0); // large contribution -> significant
/// assert_eq!(sig[(0, 1)], 0.0); // small contribution -> not significant
/// ```
///
/// # Errors
///
/// Returns [`FdarError::InvalidParameter`] if alpha is not in (0, 1).
#[must_use = "significance result should not be discarded"]
pub fn t2_pc_significance(contributions: &FdMatrix, alpha: f64) -> Result<FdMatrix, FdarError> {
    if alpha <= 0.0 || alpha >= 1.0 {
        return Err(FdarError::InvalidParameter {
            parameter: "alpha",
            message: format!("alpha must be in (0, 1), got {alpha}"),
        });
    }

    let (n, ncomp) = contributions.shape();
    // Bonferroni correction: test each PC at α/ncomp
    let threshold = chi2_quantile(1.0 - alpha / ncomp as f64, 1);

    let mut significant = FdMatrix::zeros(n, ncomp);
    for i in 0..n {
        for l in 0..ncomp {
            if contributions[(i, l)] > threshold {
                significant[(i, l)] = 1.0;
            }
        }
    }

    Ok(significant)
}