1use super::helpers::project_scores;
4use crate::error::FdarError;
5use crate::matrix::FdMatrix;
6use crate::scalar_on_function::{
7 build_design_matrix, cholesky_factor, cholesky_forward_back, compute_hat_diagonal, compute_xtx,
8 FregreLmResult, FunctionalLogisticResult,
9};
10
11#[derive(Debug, Clone, PartialEq)]
17#[non_exhaustive]
18pub struct VifResult {
19 pub vif: Vec<f64>,
21 pub labels: Vec<String>,
23 pub mean_vif: f64,
25 pub n_moderate: usize,
27 pub n_severe: usize,
29}
30
31#[must_use = "expensive computation whose result should not be discarded"]
39pub fn fpc_vif(
40 fit: &FregreLmResult,
41 data: &FdMatrix,
42 scalar_covariates: Option<&FdMatrix>,
43) -> Result<VifResult, FdarError> {
44 crate::explain_generic::generic_vif(fit, data, scalar_covariates)
45}
46
47#[must_use = "expensive computation whose result should not be discarded"]
53pub fn fpc_vif_logistic(
54 fit: &FunctionalLogisticResult,
55 data: &FdMatrix,
56 scalar_covariates: Option<&FdMatrix>,
57) -> Result<VifResult, FdarError> {
58 crate::explain_generic::generic_vif(fit, data, scalar_covariates)
59}
60
61pub(crate) fn compute_vif_from_scores(
62 scores: &FdMatrix,
63 ncomp: usize,
64 scalar_covariates: Option<&FdMatrix>,
65 n: usize,
66) -> Result<VifResult, FdarError> {
67 let p_scalar = scalar_covariates.map_or(0, super::super::matrix::FdMatrix::ncols);
68 let p = ncomp + p_scalar;
69 if p == 0 || n <= p {
70 return Err(FdarError::InvalidDimension {
71 parameter: "scores",
72 expected: format!("n > p (p={p})"),
73 actual: format!("n={n}"),
74 });
75 }
76
77 let x_noi = build_no_intercept_matrix(scores, ncomp, scalar_covariates, n);
78 let xtx = compute_xtx(&x_noi);
79 let l = cholesky_factor(&xtx, p)?;
80
81 let mut vif = vec![0.0; p];
82 for k in 0..p {
83 let mut ek = vec![0.0; p];
84 ek[k] = 1.0;
85 let v = cholesky_forward_back(&l, &ek, p);
86 vif[k] = v[k] * xtx[k * p + k];
87 }
88
89 let mut labels = Vec::with_capacity(p);
90 for k in 0..ncomp {
91 labels.push(format!("FPC_{k}"));
92 }
93 for j in 0..p_scalar {
94 labels.push(format!("scalar_{j}"));
95 }
96
97 let mean_vif = vif.iter().sum::<f64>() / p as f64;
98 let n_moderate = vif.iter().filter(|&&v| v > 5.0).count();
99 let n_severe = vif.iter().filter(|&&v| v > 10.0).count();
100
101 Ok(VifResult {
102 vif,
103 labels,
104 mean_vif,
105 n_moderate,
106 n_severe,
107 })
108}
109
110fn build_no_intercept_matrix(
112 scores: &FdMatrix,
113 ncomp: usize,
114 scalar_covariates: Option<&FdMatrix>,
115 n: usize,
116) -> FdMatrix {
117 let p_scalar = scalar_covariates.map_or(0, super::super::matrix::FdMatrix::ncols);
118 let p = ncomp + p_scalar;
119 let mut x = FdMatrix::zeros(n, p);
120 for i in 0..n {
121 for k in 0..ncomp {
122 x[(i, k)] = scores[(i, k)];
123 }
124 if let Some(sc) = scalar_covariates {
125 for j in 0..p_scalar {
126 x[(i, ncomp + j)] = sc[(i, j)];
127 }
128 }
129 }
130 x
131}
132
133#[derive(Debug, Clone, PartialEq)]
139pub struct InfluenceDiagnostics {
140 pub leverage: Vec<f64>,
142 pub cooks_distance: Vec<f64>,
144 pub p: usize,
146 pub mse: f64,
148}
149
150#[must_use = "expensive computation whose result should not be discarded"]
159pub fn influence_diagnostics(
160 fit: &FregreLmResult,
161 data: &FdMatrix,
162 scalar_covariates: Option<&FdMatrix>,
163) -> Result<InfluenceDiagnostics, FdarError> {
164 let (n, m) = data.shape();
165 if n == 0 {
166 return Err(FdarError::InvalidDimension {
167 parameter: "data",
168 expected: ">0 rows".into(),
169 actual: "0".into(),
170 });
171 }
172 if m != fit.fpca.mean.len() {
173 return Err(FdarError::InvalidDimension {
174 parameter: "data",
175 expected: format!("{} columns", fit.fpca.mean.len()),
176 actual: format!("{m}"),
177 });
178 }
179 let ncomp = fit.ncomp;
180 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
181 let design = build_design_matrix(&scores, ncomp, scalar_covariates, n);
182 let p = design.ncols();
183
184 if n <= p {
185 return Err(FdarError::InvalidDimension {
186 parameter: "data",
187 expected: format!(">{p} rows (more than parameters)"),
188 actual: format!("{n}"),
189 });
190 }
191
192 let xtx = compute_xtx(&design);
193 let l = cholesky_factor(&xtx, p)?;
194 let leverage = compute_hat_diagonal(&design, &l);
195
196 let ss_res: f64 = fit.residuals.iter().map(|r| r * r).sum();
197 let mse = ss_res / (n - p) as f64;
198
199 let mut cooks_distance = vec![0.0; n];
200 for i in 0..n {
201 let e = fit.residuals[i];
202 let h = leverage[i];
203 let denom = p as f64 * mse * (1.0 - h).powi(2);
204 cooks_distance[i] = if denom > 0.0 { e * e * h / denom } else { 0.0 };
205 }
206
207 Ok(InfluenceDiagnostics {
208 leverage,
209 cooks_distance,
210 p,
211 mse,
212 })
213}
214
215#[derive(Debug, Clone, PartialEq)]
221#[non_exhaustive]
222pub struct DfbetasDffitsResult {
223 pub dfbetas: FdMatrix,
225 pub dffits: Vec<f64>,
227 pub studentized_residuals: Vec<f64>,
229 pub p: usize,
231 pub dfbetas_cutoff: f64,
233 pub dffits_cutoff: f64,
235}
236
237#[must_use = "expensive computation whose result should not be discarded"]
250pub fn dfbetas_dffits(
251 fit: &FregreLmResult,
252 data: &FdMatrix,
253 scalar_covariates: Option<&FdMatrix>,
254) -> Result<DfbetasDffitsResult, FdarError> {
255 let (n, m) = data.shape();
256 if n == 0 {
257 return Err(FdarError::InvalidDimension {
258 parameter: "data",
259 expected: ">0 rows".into(),
260 actual: "0".into(),
261 });
262 }
263 if m != fit.fpca.mean.len() {
264 return Err(FdarError::InvalidDimension {
265 parameter: "data",
266 expected: format!("{} columns", fit.fpca.mean.len()),
267 actual: format!("{m}"),
268 });
269 }
270 let ncomp = fit.ncomp;
271 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
272 let design = build_design_matrix(&scores, ncomp, scalar_covariates, n);
273 let p = design.ncols();
274
275 if n <= p {
276 return Err(FdarError::InvalidDimension {
277 parameter: "data",
278 expected: format!(">{p} rows (more than parameters)"),
279 actual: format!("{n}"),
280 });
281 }
282
283 let xtx = compute_xtx(&design);
284 let l = cholesky_factor(&xtx, p)?;
285 let hat_diag = compute_hat_diagonal(&design, &l);
286
287 let ss_res: f64 = fit.residuals.iter().map(|r| r * r).sum();
288 let mse = ss_res / (n - p) as f64;
289 let s = mse.sqrt();
290
291 if s < 1e-15 {
292 return Err(FdarError::ComputationFailed {
293 operation: "dfbetas_dffits",
294 detail: "residual standard error is near zero; the model may be overfitting (perfect fit) — try reducing ncomp".into(),
295 });
296 }
297
298 let se = compute_coefficient_se(&l, mse, p);
299
300 let mut studentized_residuals = vec![0.0; n];
301 let mut dffits = vec![0.0; n];
302 let mut dfbetas = FdMatrix::zeros(n, p);
303
304 for i in 0..n {
305 let (t_i, dffits_i, dfb) =
306 compute_obs_influence(&design, &l, fit.residuals[i], hat_diag[i], s, &se, p, i);
307 studentized_residuals[i] = t_i;
308 dffits[i] = dffits_i;
309 for j in 0..p {
310 dfbetas[(i, j)] = dfb[j];
311 }
312 }
313
314 let dfbetas_cutoff = 2.0 / (n as f64).sqrt();
315 let dffits_cutoff = 2.0 * (p as f64 / n as f64).sqrt();
316
317 Ok(DfbetasDffitsResult {
318 dfbetas,
319 dffits,
320 studentized_residuals,
321 p,
322 dfbetas_cutoff,
323 dffits_cutoff,
324 })
325}
326
327fn compute_coefficient_se(l: &[f64], mse: f64, p: usize) -> Vec<f64> {
329 let mut se = vec![0.0; p];
330 for j in 0..p {
331 let mut ej = vec![0.0; p];
332 ej[j] = 1.0;
333 let v = cholesky_forward_back(l, &ej, p);
334 se[j] = (mse * v[j].max(0.0)).sqrt();
335 }
336 se
337}
338
339fn compute_obs_influence(
341 design: &FdMatrix,
342 l: &[f64],
343 residual: f64,
344 h_ii: f64,
345 s: f64,
346 se: &[f64],
347 p: usize,
348 i: usize,
349) -> (f64, f64, Vec<f64>) {
350 let one_minus_h = (1.0 - h_ii).max(1e-15);
351 let t_i = residual / (s * one_minus_h.sqrt());
352 let dffits_i = t_i * (h_ii / one_minus_h).sqrt();
353
354 let mut xi = vec![0.0; p];
355 for j in 0..p {
356 xi[j] = design[(i, j)];
357 }
358 let inv_xtx_xi = cholesky_forward_back(l, &xi, p);
359 let mut dfb = vec![0.0; p];
360 for j in 0..p {
361 if se[j] > 1e-15 {
362 dfb[j] = inv_xtx_xi[j] * residual / (one_minus_h * se[j]);
363 }
364 }
365
366 (t_i, dffits_i, dfb)
367}
368
369#[derive(Debug, Clone, PartialEq)]
375#[non_exhaustive]
376pub struct PredictionIntervalResult {
377 pub predictions: Vec<f64>,
379 pub lower: Vec<f64>,
381 pub upper: Vec<f64>,
383 pub prediction_se: Vec<f64>,
385 pub confidence_level: f64,
387 pub t_critical: f64,
389 pub residual_se: f64,
391}
392
393#[must_use = "prediction result should not be discarded"]
406pub fn prediction_intervals(
407 fit: &FregreLmResult,
408 train_data: &FdMatrix,
409 train_scalar: Option<&FdMatrix>,
410 new_data: &FdMatrix,
411 new_scalar: Option<&FdMatrix>,
412 confidence_level: f64,
413) -> Result<PredictionIntervalResult, FdarError> {
414 let (n_train, m) = train_data.shape();
415 let (n_new, m_new) = new_data.shape();
416 if confidence_level <= 0.0 || confidence_level >= 1.0 {
417 return Err(FdarError::InvalidParameter {
418 parameter: "confidence_level",
419 message: "must be in (0, 1)".into(),
420 });
421 }
422 if n_train == 0 {
423 return Err(FdarError::InvalidDimension {
424 parameter: "train_data",
425 expected: ">0 rows".into(),
426 actual: "0".into(),
427 });
428 }
429 if m != fit.fpca.mean.len() {
430 return Err(FdarError::InvalidDimension {
431 parameter: "train_data",
432 expected: format!("{} columns", fit.fpca.mean.len()),
433 actual: format!("{m}"),
434 });
435 }
436 if n_new == 0 {
437 return Err(FdarError::InvalidDimension {
438 parameter: "new_data",
439 expected: ">0 rows".into(),
440 actual: "0".into(),
441 });
442 }
443 if m_new != m {
444 return Err(FdarError::InvalidDimension {
445 parameter: "new_data",
446 expected: format!("{m} columns (matching train)"),
447 actual: format!("{m_new}"),
448 });
449 }
450 let ncomp = fit.ncomp;
451
452 let train_scores = project_scores(train_data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
453 let train_design = build_design_matrix(&train_scores, ncomp, train_scalar, n_train);
454 let p = train_design.ncols();
455 if n_train <= p {
456 return Err(FdarError::InvalidDimension {
457 parameter: "train_data",
458 expected: format!(">{p} rows (more than parameters)"),
459 actual: format!("{n_train}"),
460 });
461 }
462
463 let xtx = compute_xtx(&train_design);
464 let l = cholesky_factor(&xtx, p)?;
465
466 let residual_se = fit.residual_se;
467 let df = n_train - p;
468 let t_crit = t_critical_value(confidence_level, df);
469
470 let new_scores = project_scores(new_data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
472
473 let mut predictions = vec![0.0; n_new];
474 let mut lower = vec![0.0; n_new];
475 let mut upper = vec![0.0; n_new];
476 let mut prediction_se = vec![0.0; n_new];
477
478 let p_scalar = fit.gamma.len();
479
480 for i in 0..n_new {
481 let x_new = build_design_vector(&new_scores, new_scalar, i, ncomp, p_scalar, p);
482 let (yhat, lo, up, pse) =
483 compute_prediction_interval_obs(&l, &fit.coefficients, &x_new, p, residual_se, t_crit);
484 predictions[i] = yhat;
485 lower[i] = lo;
486 upper[i] = up;
487 prediction_se[i] = pse;
488 }
489
490 Ok(PredictionIntervalResult {
491 predictions,
492 lower,
493 upper,
494 prediction_se,
495 confidence_level,
496 t_critical: t_crit,
497 residual_se,
498 })
499}
500
501fn normal_quantile(p: f64) -> f64 {
503 if p <= 0.0 || p >= 1.0 {
504 return 0.0;
505 }
506 let t = if p < 0.5 {
507 (-2.0 * p.ln()).sqrt()
508 } else {
509 (-2.0 * (1.0 - p).ln()).sqrt()
510 };
511 let c0 = 2.515_517;
512 let c1 = 0.802_853;
513 let c2 = 0.010_328;
514 let d1 = 1.432_788;
515 let d2 = 0.189_269;
516 let d3 = 0.001_308;
517 let val = t - (c0 + c1 * t + c2 * t * t) / (1.0 + d1 * t + d2 * t * t + d3 * t * t * t);
518 if p < 0.5 {
519 -val
520 } else {
521 val
522 }
523}
524
525fn t_critical_value(conf: f64, df: usize) -> f64 {
527 let alpha = 1.0 - conf;
528 let z = normal_quantile(1.0 - alpha / 2.0);
529 if df == 0 {
530 return z;
531 }
532 let df_f = df as f64;
533 let g1 = (z.powi(3) + z) / (4.0 * df_f);
534 let g2 = (5.0 * z.powi(5) + 16.0 * z.powi(3) + 3.0 * z) / (96.0 * df_f * df_f);
535 let g3 = (3.0 * z.powi(7) + 19.0 * z.powi(5) + 17.0 * z.powi(3) - 15.0 * z)
536 / (384.0 * df_f * df_f * df_f);
537 z + g1 + g2 + g3
538}
539
540fn build_design_vector(
542 new_scores: &FdMatrix,
543 new_scalar: Option<&FdMatrix>,
544 i: usize,
545 ncomp: usize,
546 p_scalar: usize,
547 p: usize,
548) -> Vec<f64> {
549 let mut x = vec![0.0; p];
550 x[0] = 1.0;
551 for k in 0..ncomp {
552 x[1 + k] = new_scores[(i, k)];
553 }
554 if let Some(ns) = new_scalar {
555 for j in 0..p_scalar {
556 x[1 + ncomp + j] = ns[(i, j)];
557 }
558 }
559 x
560}
561
562fn compute_prediction_interval_obs(
564 l: &[f64],
565 coefficients: &[f64],
566 x_new: &[f64],
567 p: usize,
568 residual_se: f64,
569 t_crit: f64,
570) -> (f64, f64, f64, f64) {
571 let yhat: f64 = x_new.iter().zip(coefficients).map(|(a, b)| a * b).sum();
572 let v = cholesky_forward_back(l, x_new, p);
573 let h_new: f64 = x_new.iter().zip(&v).map(|(a, b)| a * b).sum();
574 let pred_se = residual_se * (1.0 + h_new).sqrt();
575 (
576 yhat,
577 yhat - t_crit * pred_se,
578 yhat + t_crit * pred_se,
579 pred_se,
580 )
581}
582
583#[derive(Debug, Clone, PartialEq)]
589#[non_exhaustive]
590pub struct LooCvResult {
591 pub loo_residuals: Vec<f64>,
593 pub press: f64,
595 pub loo_r_squared: f64,
597 pub leverage: Vec<f64>,
599 pub tss: f64,
601}
602
603#[must_use = "expensive computation whose result should not be discarded"]
615pub fn loo_cv_press(
616 fit: &FregreLmResult,
617 data: &FdMatrix,
618 y: &[f64],
619 scalar_covariates: Option<&FdMatrix>,
620) -> Result<LooCvResult, FdarError> {
621 let (n, m) = data.shape();
622 if n == 0 {
623 return Err(FdarError::InvalidDimension {
624 parameter: "data",
625 expected: ">0 rows".into(),
626 actual: "0".into(),
627 });
628 }
629 if n != y.len() {
630 return Err(FdarError::InvalidDimension {
631 parameter: "y",
632 expected: format!("{n} (matching data rows)"),
633 actual: format!("{}", y.len()),
634 });
635 }
636 if m != fit.fpca.mean.len() {
637 return Err(FdarError::InvalidDimension {
638 parameter: "data",
639 expected: format!("{} columns", fit.fpca.mean.len()),
640 actual: format!("{m}"),
641 });
642 }
643 let ncomp = fit.ncomp;
644 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
645 let design = build_design_matrix(&scores, ncomp, scalar_covariates, n);
646 let p = design.ncols();
647 if n <= p {
648 return Err(FdarError::InvalidDimension {
649 parameter: "data",
650 expected: format!(">{p} rows (more than parameters)"),
651 actual: format!("{n}"),
652 });
653 }
654
655 let xtx = compute_xtx(&design);
656 let l = cholesky_factor(&xtx, p)?;
657 let leverage = compute_hat_diagonal(&design, &l);
658
659 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
660 let tss: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
661 if tss == 0.0 {
662 return Err(FdarError::ComputationFailed {
663 operation: "loo_cv_press",
664 detail: "total sum of squares is zero; all response values may be identical — check your data".into(),
665 });
666 }
667
668 let mut loo_residuals = vec![0.0; n];
669 let mut press = 0.0;
670 for i in 0..n {
671 let denom = (1.0 - leverage[i]).max(1e-15);
672 loo_residuals[i] = fit.residuals[i] / denom;
673 press += loo_residuals[i] * loo_residuals[i];
674 }
675
676 let loo_r_squared = 1.0 - press / tss;
677
678 Ok(LooCvResult {
679 loo_residuals,
680 press,
681 loo_r_squared,
682 leverage,
683 tss,
684 })
685}