1use super::helpers::project_scores;
4use crate::error::FdarError;
5use crate::matrix::FdMatrix;
6use crate::scalar_on_function::{
7 build_design_matrix, cholesky_factor, cholesky_forward_back, compute_hat_diagonal, compute_xtx,
8 FregreLmResult, FunctionalLogisticResult,
9};
10
11#[derive(Debug, Clone, PartialEq)]
17#[non_exhaustive]
18pub struct VifResult {
19 pub vif: Vec<f64>,
21 pub labels: Vec<String>,
23 pub mean_vif: f64,
25 pub n_moderate: usize,
27 pub n_severe: usize,
29}
30
31#[must_use = "expensive computation whose result should not be discarded"]
39pub fn fpc_vif(
40 fit: &FregreLmResult,
41 data: &FdMatrix,
42 scalar_covariates: Option<&FdMatrix>,
43) -> Result<VifResult, FdarError> {
44 crate::explain_generic::generic_vif(fit, data, scalar_covariates)
45}
46
47#[must_use = "expensive computation whose result should not be discarded"]
53pub fn fpc_vif_logistic(
54 fit: &FunctionalLogisticResult,
55 data: &FdMatrix,
56 scalar_covariates: Option<&FdMatrix>,
57) -> Result<VifResult, FdarError> {
58 crate::explain_generic::generic_vif(fit, data, scalar_covariates)
59}
60
61pub(crate) fn compute_vif_from_scores(
62 scores: &FdMatrix,
63 ncomp: usize,
64 scalar_covariates: Option<&FdMatrix>,
65 n: usize,
66) -> Result<VifResult, FdarError> {
67 let p_scalar = scalar_covariates.map_or(0, super::super::matrix::FdMatrix::ncols);
68 let p = ncomp + p_scalar;
69 if p == 0 || n <= p {
70 return Err(FdarError::InvalidDimension {
71 parameter: "scores",
72 expected: format!("n > p (p={p})"),
73 actual: format!("n={n}"),
74 });
75 }
76
77 let x_noi = build_no_intercept_matrix(scores, ncomp, scalar_covariates, n);
78 let xtx = compute_xtx(&x_noi);
79 let l = cholesky_factor(&xtx, p)?;
80
81 let mut vif = vec![0.0; p];
82 for k in 0..p {
83 let mut ek = vec![0.0; p];
84 ek[k] = 1.0;
85 let v = cholesky_forward_back(&l, &ek, p);
86 vif[k] = v[k] * xtx[k * p + k];
87 }
88
89 let mut labels = Vec::with_capacity(p);
90 for k in 0..ncomp {
91 labels.push(format!("FPC_{k}"));
92 }
93 for j in 0..p_scalar {
94 labels.push(format!("scalar_{j}"));
95 }
96
97 let mean_vif = vif.iter().sum::<f64>() / p as f64;
98 let n_moderate = vif.iter().filter(|&&v| v > 5.0).count();
99 let n_severe = vif.iter().filter(|&&v| v > 10.0).count();
100
101 Ok(VifResult {
102 vif,
103 labels,
104 mean_vif,
105 n_moderate,
106 n_severe,
107 })
108}
109
110fn build_no_intercept_matrix(
112 scores: &FdMatrix,
113 ncomp: usize,
114 scalar_covariates: Option<&FdMatrix>,
115 n: usize,
116) -> FdMatrix {
117 let p_scalar = scalar_covariates.map_or(0, super::super::matrix::FdMatrix::ncols);
118 let p = ncomp + p_scalar;
119 let mut x = FdMatrix::zeros(n, p);
120 for i in 0..n {
121 for k in 0..ncomp {
122 x[(i, k)] = scores[(i, k)];
123 }
124 if let Some(sc) = scalar_covariates {
125 for j in 0..p_scalar {
126 x[(i, ncomp + j)] = sc[(i, j)];
127 }
128 }
129 }
130 x
131}
132
133#[derive(Debug, Clone, PartialEq)]
139pub struct InfluenceDiagnostics {
140 pub leverage: Vec<f64>,
142 pub cooks_distance: Vec<f64>,
144 pub p: usize,
146 pub mse: f64,
148}
149
150#[must_use = "expensive computation whose result should not be discarded"]
159pub fn influence_diagnostics(
160 fit: &FregreLmResult,
161 data: &FdMatrix,
162 scalar_covariates: Option<&FdMatrix>,
163) -> Result<InfluenceDiagnostics, FdarError> {
164 let (n, m) = data.shape();
165 if n == 0 {
166 return Err(FdarError::InvalidDimension {
167 parameter: "data",
168 expected: ">0 rows".into(),
169 actual: "0".into(),
170 });
171 }
172 if m != fit.fpca.mean.len() {
173 return Err(FdarError::InvalidDimension {
174 parameter: "data",
175 expected: format!("{} columns", fit.fpca.mean.len()),
176 actual: format!("{m}"),
177 });
178 }
179 let ncomp = fit.ncomp;
180 let scores = project_scores(
181 data,
182 &fit.fpca.mean,
183 &fit.fpca.rotation,
184 ncomp,
185 &fit.fpca.weights,
186 );
187 let design = build_design_matrix(&scores, ncomp, scalar_covariates, n);
188 let p = design.ncols();
189
190 if n <= p {
191 return Err(FdarError::InvalidDimension {
192 parameter: "data",
193 expected: format!(">{p} rows (more than parameters)"),
194 actual: format!("{n}"),
195 });
196 }
197
198 let xtx = compute_xtx(&design);
199 let l = cholesky_factor(&xtx, p)?;
200 let leverage = compute_hat_diagonal(&design, &l);
201
202 let ss_res: f64 = fit.residuals.iter().map(|r| r * r).sum();
203 let mse = ss_res / (n - p) as f64;
204
205 let mut cooks_distance = vec![0.0; n];
206 for i in 0..n {
207 let e = fit.residuals[i];
208 let h = leverage[i];
209 let denom = p as f64 * mse * (1.0 - h).powi(2);
210 cooks_distance[i] = if denom > 0.0 { e * e * h / denom } else { 0.0 };
211 }
212
213 Ok(InfluenceDiagnostics {
214 leverage,
215 cooks_distance,
216 p,
217 mse,
218 })
219}
220
221#[derive(Debug, Clone, PartialEq)]
227#[non_exhaustive]
228pub struct DfbetasDffitsResult {
229 pub dfbetas: FdMatrix,
231 pub dffits: Vec<f64>,
233 pub studentized_residuals: Vec<f64>,
235 pub p: usize,
237 pub dfbetas_cutoff: f64,
239 pub dffits_cutoff: f64,
241}
242
243#[must_use = "expensive computation whose result should not be discarded"]
256pub fn dfbetas_dffits(
257 fit: &FregreLmResult,
258 data: &FdMatrix,
259 scalar_covariates: Option<&FdMatrix>,
260) -> Result<DfbetasDffitsResult, FdarError> {
261 let (n, m) = data.shape();
262 if n == 0 {
263 return Err(FdarError::InvalidDimension {
264 parameter: "data",
265 expected: ">0 rows".into(),
266 actual: "0".into(),
267 });
268 }
269 if m != fit.fpca.mean.len() {
270 return Err(FdarError::InvalidDimension {
271 parameter: "data",
272 expected: format!("{} columns", fit.fpca.mean.len()),
273 actual: format!("{m}"),
274 });
275 }
276 let ncomp = fit.ncomp;
277 let scores = project_scores(
278 data,
279 &fit.fpca.mean,
280 &fit.fpca.rotation,
281 ncomp,
282 &fit.fpca.weights,
283 );
284 let design = build_design_matrix(&scores, ncomp, scalar_covariates, n);
285 let p = design.ncols();
286
287 if n <= p {
288 return Err(FdarError::InvalidDimension {
289 parameter: "data",
290 expected: format!(">{p} rows (more than parameters)"),
291 actual: format!("{n}"),
292 });
293 }
294
295 let xtx = compute_xtx(&design);
296 let l = cholesky_factor(&xtx, p)?;
297 let hat_diag = compute_hat_diagonal(&design, &l);
298
299 let ss_res: f64 = fit.residuals.iter().map(|r| r * r).sum();
300 let mse = ss_res / (n - p) as f64;
301 let s = mse.sqrt();
302
303 if s < 1e-15 {
304 return Err(FdarError::ComputationFailed {
305 operation: "dfbetas_dffits",
306 detail: "residual standard error is near zero; the model may be overfitting (perfect fit) — try reducing ncomp".into(),
307 });
308 }
309
310 let se = compute_coefficient_se(&l, mse, p);
311
312 let mut studentized_residuals = vec![0.0; n];
313 let mut dffits = vec![0.0; n];
314 let mut dfbetas = FdMatrix::zeros(n, p);
315
316 for i in 0..n {
317 let (t_i, dffits_i, dfb) =
318 compute_obs_influence(&design, &l, fit.residuals[i], hat_diag[i], s, &se, p, i);
319 studentized_residuals[i] = t_i;
320 dffits[i] = dffits_i;
321 for j in 0..p {
322 dfbetas[(i, j)] = dfb[j];
323 }
324 }
325
326 let dfbetas_cutoff = 2.0 / (n as f64).sqrt();
327 let dffits_cutoff = 2.0 * (p as f64 / n as f64).sqrt();
328
329 Ok(DfbetasDffitsResult {
330 dfbetas,
331 dffits,
332 studentized_residuals,
333 p,
334 dfbetas_cutoff,
335 dffits_cutoff,
336 })
337}
338
339fn compute_coefficient_se(l: &[f64], mse: f64, p: usize) -> Vec<f64> {
341 let mut se = vec![0.0; p];
342 for j in 0..p {
343 let mut ej = vec![0.0; p];
344 ej[j] = 1.0;
345 let v = cholesky_forward_back(l, &ej, p);
346 se[j] = (mse * v[j].max(0.0)).sqrt();
347 }
348 se
349}
350
351fn compute_obs_influence(
353 design: &FdMatrix,
354 l: &[f64],
355 residual: f64,
356 h_ii: f64,
357 s: f64,
358 se: &[f64],
359 p: usize,
360 i: usize,
361) -> (f64, f64, Vec<f64>) {
362 let one_minus_h = (1.0 - h_ii).max(1e-15);
363 let t_i = residual / (s * one_minus_h.sqrt());
364 let dffits_i = t_i * (h_ii / one_minus_h).sqrt();
365
366 let mut xi = vec![0.0; p];
367 for j in 0..p {
368 xi[j] = design[(i, j)];
369 }
370 let inv_xtx_xi = cholesky_forward_back(l, &xi, p);
371 let mut dfb = vec![0.0; p];
372 for j in 0..p {
373 if se[j] > 1e-15 {
374 dfb[j] = inv_xtx_xi[j] * residual / (one_minus_h * se[j]);
375 }
376 }
377
378 (t_i, dffits_i, dfb)
379}
380
381#[derive(Debug, Clone, PartialEq)]
387#[non_exhaustive]
388pub struct PredictionIntervalResult {
389 pub predictions: Vec<f64>,
391 pub lower: Vec<f64>,
393 pub upper: Vec<f64>,
395 pub prediction_se: Vec<f64>,
397 pub confidence_level: f64,
399 pub t_critical: f64,
401 pub residual_se: f64,
403}
404
405#[must_use = "prediction result should not be discarded"]
418pub fn prediction_intervals(
419 fit: &FregreLmResult,
420 train_data: &FdMatrix,
421 train_scalar: Option<&FdMatrix>,
422 new_data: &FdMatrix,
423 new_scalar: Option<&FdMatrix>,
424 confidence_level: f64,
425) -> Result<PredictionIntervalResult, FdarError> {
426 let (n_train, m) = train_data.shape();
427 let (n_new, m_new) = new_data.shape();
428 if confidence_level <= 0.0 || confidence_level >= 1.0 {
429 return Err(FdarError::InvalidParameter {
430 parameter: "confidence_level",
431 message: "must be in (0, 1)".into(),
432 });
433 }
434 if n_train == 0 {
435 return Err(FdarError::InvalidDimension {
436 parameter: "train_data",
437 expected: ">0 rows".into(),
438 actual: "0".into(),
439 });
440 }
441 if m != fit.fpca.mean.len() {
442 return Err(FdarError::InvalidDimension {
443 parameter: "train_data",
444 expected: format!("{} columns", fit.fpca.mean.len()),
445 actual: format!("{m}"),
446 });
447 }
448 if n_new == 0 {
449 return Err(FdarError::InvalidDimension {
450 parameter: "new_data",
451 expected: ">0 rows".into(),
452 actual: "0".into(),
453 });
454 }
455 if m_new != m {
456 return Err(FdarError::InvalidDimension {
457 parameter: "new_data",
458 expected: format!("{m} columns (matching train)"),
459 actual: format!("{m_new}"),
460 });
461 }
462 let ncomp = fit.ncomp;
463
464 let train_scores = project_scores(
465 train_data,
466 &fit.fpca.mean,
467 &fit.fpca.rotation,
468 ncomp,
469 &fit.fpca.weights,
470 );
471 let train_design = build_design_matrix(&train_scores, ncomp, train_scalar, n_train);
472 let p = train_design.ncols();
473 if n_train <= p {
474 return Err(FdarError::InvalidDimension {
475 parameter: "train_data",
476 expected: format!(">{p} rows (more than parameters)"),
477 actual: format!("{n_train}"),
478 });
479 }
480
481 let xtx = compute_xtx(&train_design);
482 let l = cholesky_factor(&xtx, p)?;
483
484 let residual_se = fit.residual_se;
485 let df = n_train - p;
486 let t_crit = t_critical_value(confidence_level, df);
487
488 let new_scores = project_scores(
490 new_data,
491 &fit.fpca.mean,
492 &fit.fpca.rotation,
493 ncomp,
494 &fit.fpca.weights,
495 );
496
497 let mut predictions = vec![0.0; n_new];
498 let mut lower = vec![0.0; n_new];
499 let mut upper = vec![0.0; n_new];
500 let mut prediction_se = vec![0.0; n_new];
501
502 let p_scalar = fit.gamma.len();
503
504 for i in 0..n_new {
505 let x_new = build_design_vector(&new_scores, new_scalar, i, ncomp, p_scalar, p);
506 let (yhat, lo, up, pse) =
507 compute_prediction_interval_obs(&l, &fit.coefficients, &x_new, p, residual_se, t_crit);
508 predictions[i] = yhat;
509 lower[i] = lo;
510 upper[i] = up;
511 prediction_se[i] = pse;
512 }
513
514 Ok(PredictionIntervalResult {
515 predictions,
516 lower,
517 upper,
518 prediction_se,
519 confidence_level,
520 t_critical: t_crit,
521 residual_se,
522 })
523}
524
525fn normal_quantile(p: f64) -> f64 {
527 if p <= 0.0 || p >= 1.0 {
528 return 0.0;
529 }
530 let t = if p < 0.5 {
531 (-2.0 * p.ln()).sqrt()
532 } else {
533 (-2.0 * (1.0 - p).ln()).sqrt()
534 };
535 let c0 = 2.515_517;
536 let c1 = 0.802_853;
537 let c2 = 0.010_328;
538 let d1 = 1.432_788;
539 let d2 = 0.189_269;
540 let d3 = 0.001_308;
541 let val = t - (c0 + c1 * t + c2 * t * t) / (1.0 + d1 * t + d2 * t * t + d3 * t * t * t);
542 if p < 0.5 {
543 -val
544 } else {
545 val
546 }
547}
548
549fn t_critical_value(conf: f64, df: usize) -> f64 {
551 let alpha = 1.0 - conf;
552 let z = normal_quantile(1.0 - alpha / 2.0);
553 if df == 0 {
554 return z;
555 }
556 let df_f = df as f64;
557 let g1 = (z.powi(3) + z) / (4.0 * df_f);
558 let g2 = (5.0 * z.powi(5) + 16.0 * z.powi(3) + 3.0 * z) / (96.0 * df_f * df_f);
559 let g3 = (3.0 * z.powi(7) + 19.0 * z.powi(5) + 17.0 * z.powi(3) - 15.0 * z)
560 / (384.0 * df_f * df_f * df_f);
561 z + g1 + g2 + g3
562}
563
564fn build_design_vector(
566 new_scores: &FdMatrix,
567 new_scalar: Option<&FdMatrix>,
568 i: usize,
569 ncomp: usize,
570 p_scalar: usize,
571 p: usize,
572) -> Vec<f64> {
573 let mut x = vec![0.0; p];
574 x[0] = 1.0;
575 for k in 0..ncomp {
576 x[1 + k] = new_scores[(i, k)];
577 }
578 if let Some(ns) = new_scalar {
579 for j in 0..p_scalar {
580 x[1 + ncomp + j] = ns[(i, j)];
581 }
582 }
583 x
584}
585
586fn compute_prediction_interval_obs(
588 l: &[f64],
589 coefficients: &[f64],
590 x_new: &[f64],
591 p: usize,
592 residual_se: f64,
593 t_crit: f64,
594) -> (f64, f64, f64, f64) {
595 let yhat: f64 = x_new.iter().zip(coefficients).map(|(a, b)| a * b).sum();
596 let v = cholesky_forward_back(l, x_new, p);
597 let h_new: f64 = x_new.iter().zip(&v).map(|(a, b)| a * b).sum();
598 let pred_se = residual_se * (1.0 + h_new).sqrt();
599 (
600 yhat,
601 yhat - t_crit * pred_se,
602 yhat + t_crit * pred_se,
603 pred_se,
604 )
605}
606
607#[derive(Debug, Clone, PartialEq)]
613#[non_exhaustive]
614pub struct LooCvResult {
615 pub loo_residuals: Vec<f64>,
617 pub press: f64,
619 pub loo_r_squared: f64,
621 pub leverage: Vec<f64>,
623 pub tss: f64,
625}
626
627#[must_use = "expensive computation whose result should not be discarded"]
639pub fn loo_cv_press(
640 fit: &FregreLmResult,
641 data: &FdMatrix,
642 y: &[f64],
643 scalar_covariates: Option<&FdMatrix>,
644) -> Result<LooCvResult, FdarError> {
645 let (n, m) = data.shape();
646 if n == 0 {
647 return Err(FdarError::InvalidDimension {
648 parameter: "data",
649 expected: ">0 rows".into(),
650 actual: "0".into(),
651 });
652 }
653 if n != y.len() {
654 return Err(FdarError::InvalidDimension {
655 parameter: "y",
656 expected: format!("{n} (matching data rows)"),
657 actual: format!("{}", y.len()),
658 });
659 }
660 if m != fit.fpca.mean.len() {
661 return Err(FdarError::InvalidDimension {
662 parameter: "data",
663 expected: format!("{} columns", fit.fpca.mean.len()),
664 actual: format!("{m}"),
665 });
666 }
667 let ncomp = fit.ncomp;
668 let scores = project_scores(
669 data,
670 &fit.fpca.mean,
671 &fit.fpca.rotation,
672 ncomp,
673 &fit.fpca.weights,
674 );
675 let design = build_design_matrix(&scores, ncomp, scalar_covariates, n);
676 let p = design.ncols();
677 if n <= p {
678 return Err(FdarError::InvalidDimension {
679 parameter: "data",
680 expected: format!(">{p} rows (more than parameters)"),
681 actual: format!("{n}"),
682 });
683 }
684
685 let xtx = compute_xtx(&design);
686 let l = cholesky_factor(&xtx, p)?;
687 let leverage = compute_hat_diagonal(&design, &l);
688
689 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
690 let tss: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
691 if tss == 0.0 {
692 return Err(FdarError::ComputationFailed {
693 operation: "loo_cv_press",
694 detail: "total sum of squares is zero; all response values may be identical — check your data".into(),
695 });
696 }
697
698 let mut loo_residuals = vec![0.0; n];
699 let mut press = 0.0;
700 for i in 0..n {
701 let denom = (1.0 - leverage[i]).max(1e-15);
702 loo_residuals[i] = fit.residuals[i] / denom;
703 press += loo_residuals[i] * loo_residuals[i];
704 }
705
706 let loo_r_squared = 1.0 - press / tss;
707
708 Ok(LooCvResult {
709 loo_residuals,
710 press,
711 loo_r_squared,
712 leverage,
713 tss,
714 })
715}