1use super::helpers::*;
4use crate::error::FdarError;
5use crate::matrix::FdMatrix;
6use crate::scalar_on_function::{
7 build_design_matrix, cholesky_factor, cholesky_forward_back, compute_hat_diagonal, compute_xtx,
8 FregreLmResult, FunctionalLogisticResult,
9};
10
11#[derive(Debug, Clone, PartialEq)]
17pub struct VifResult {
18 pub vif: Vec<f64>,
20 pub labels: Vec<String>,
22 pub mean_vif: f64,
24 pub n_moderate: usize,
26 pub n_severe: usize,
28}
29
30#[must_use = "expensive computation whose result should not be discarded"]
38pub fn fpc_vif(
39 fit: &FregreLmResult,
40 data: &FdMatrix,
41 scalar_covariates: Option<&FdMatrix>,
42) -> Result<VifResult, FdarError> {
43 crate::explain_generic::generic_vif(fit, data, scalar_covariates)
44}
45
46#[must_use = "expensive computation whose result should not be discarded"]
52pub fn fpc_vif_logistic(
53 fit: &FunctionalLogisticResult,
54 data: &FdMatrix,
55 scalar_covariates: Option<&FdMatrix>,
56) -> Result<VifResult, FdarError> {
57 crate::explain_generic::generic_vif(fit, data, scalar_covariates)
58}
59
60pub(crate) fn compute_vif_from_scores(
61 scores: &FdMatrix,
62 ncomp: usize,
63 scalar_covariates: Option<&FdMatrix>,
64 n: usize,
65) -> Result<VifResult, FdarError> {
66 let p_scalar = scalar_covariates.map_or(0, super::super::matrix::FdMatrix::ncols);
67 let p = ncomp + p_scalar;
68 if p == 0 || n <= p {
69 return Err(FdarError::InvalidDimension {
70 parameter: "scores",
71 expected: format!("n > p (p={p})"),
72 actual: format!("n={n}"),
73 });
74 }
75
76 let x_noi = build_no_intercept_matrix(scores, ncomp, scalar_covariates, n);
77 let xtx = compute_xtx(&x_noi);
78 let l = cholesky_factor(&xtx, p)?;
79
80 let mut vif = vec![0.0; p];
81 for k in 0..p {
82 let mut ek = vec![0.0; p];
83 ek[k] = 1.0;
84 let v = cholesky_forward_back(&l, &ek, p);
85 vif[k] = v[k] * xtx[k * p + k];
86 }
87
88 let mut labels = Vec::with_capacity(p);
89 for k in 0..ncomp {
90 labels.push(format!("FPC_{k}"));
91 }
92 for j in 0..p_scalar {
93 labels.push(format!("scalar_{j}"));
94 }
95
96 let mean_vif = vif.iter().sum::<f64>() / p as f64;
97 let n_moderate = vif.iter().filter(|&&v| v > 5.0).count();
98 let n_severe = vif.iter().filter(|&&v| v > 10.0).count();
99
100 Ok(VifResult {
101 vif,
102 labels,
103 mean_vif,
104 n_moderate,
105 n_severe,
106 })
107}
108
109fn build_no_intercept_matrix(
111 scores: &FdMatrix,
112 ncomp: usize,
113 scalar_covariates: Option<&FdMatrix>,
114 n: usize,
115) -> FdMatrix {
116 let p_scalar = scalar_covariates.map_or(0, super::super::matrix::FdMatrix::ncols);
117 let p = ncomp + p_scalar;
118 let mut x = FdMatrix::zeros(n, p);
119 for i in 0..n {
120 for k in 0..ncomp {
121 x[(i, k)] = scores[(i, k)];
122 }
123 if let Some(sc) = scalar_covariates {
124 for j in 0..p_scalar {
125 x[(i, ncomp + j)] = sc[(i, j)];
126 }
127 }
128 }
129 x
130}
131
132#[derive(Debug, Clone, PartialEq)]
138pub struct InfluenceDiagnostics {
139 pub leverage: Vec<f64>,
141 pub cooks_distance: Vec<f64>,
143 pub p: usize,
145 pub mse: f64,
147}
148
149#[must_use = "expensive computation whose result should not be discarded"]
158pub fn influence_diagnostics(
159 fit: &FregreLmResult,
160 data: &FdMatrix,
161 scalar_covariates: Option<&FdMatrix>,
162) -> Result<InfluenceDiagnostics, FdarError> {
163 let (n, m) = data.shape();
164 if n == 0 {
165 return Err(FdarError::InvalidDimension {
166 parameter: "data",
167 expected: ">0 rows".into(),
168 actual: "0".into(),
169 });
170 }
171 if m != fit.fpca.mean.len() {
172 return Err(FdarError::InvalidDimension {
173 parameter: "data",
174 expected: format!("{} columns", fit.fpca.mean.len()),
175 actual: format!("{m}"),
176 });
177 }
178 let ncomp = fit.ncomp;
179 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
180 let design = build_design_matrix(&scores, ncomp, scalar_covariates, n);
181 let p = design.ncols();
182
183 if n <= p {
184 return Err(FdarError::InvalidDimension {
185 parameter: "data",
186 expected: format!(">{p} rows (more than parameters)"),
187 actual: format!("{n}"),
188 });
189 }
190
191 let xtx = compute_xtx(&design);
192 let l = cholesky_factor(&xtx, p)?;
193 let leverage = compute_hat_diagonal(&design, &l);
194
195 let ss_res: f64 = fit.residuals.iter().map(|r| r * r).sum();
196 let mse = ss_res / (n - p) as f64;
197
198 let mut cooks_distance = vec![0.0; n];
199 for i in 0..n {
200 let e = fit.residuals[i];
201 let h = leverage[i];
202 let denom = p as f64 * mse * (1.0 - h).powi(2);
203 cooks_distance[i] = if denom > 0.0 { e * e * h / denom } else { 0.0 };
204 }
205
206 Ok(InfluenceDiagnostics {
207 leverage,
208 cooks_distance,
209 p,
210 mse,
211 })
212}
213
214#[derive(Debug, Clone, PartialEq)]
220pub struct DfbetasDffitsResult {
221 pub dfbetas: FdMatrix,
223 pub dffits: Vec<f64>,
225 pub studentized_residuals: Vec<f64>,
227 pub p: usize,
229 pub dfbetas_cutoff: f64,
231 pub dffits_cutoff: f64,
233}
234
235#[must_use = "expensive computation whose result should not be discarded"]
248pub fn dfbetas_dffits(
249 fit: &FregreLmResult,
250 data: &FdMatrix,
251 scalar_covariates: Option<&FdMatrix>,
252) -> Result<DfbetasDffitsResult, FdarError> {
253 let (n, m) = data.shape();
254 if n == 0 {
255 return Err(FdarError::InvalidDimension {
256 parameter: "data",
257 expected: ">0 rows".into(),
258 actual: "0".into(),
259 });
260 }
261 if m != fit.fpca.mean.len() {
262 return Err(FdarError::InvalidDimension {
263 parameter: "data",
264 expected: format!("{} columns", fit.fpca.mean.len()),
265 actual: format!("{m}"),
266 });
267 }
268 let ncomp = fit.ncomp;
269 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
270 let design = build_design_matrix(&scores, ncomp, scalar_covariates, n);
271 let p = design.ncols();
272
273 if n <= p {
274 return Err(FdarError::InvalidDimension {
275 parameter: "data",
276 expected: format!(">{p} rows (more than parameters)"),
277 actual: format!("{n}"),
278 });
279 }
280
281 let xtx = compute_xtx(&design);
282 let l = cholesky_factor(&xtx, p)?;
283 let hat_diag = compute_hat_diagonal(&design, &l);
284
285 let ss_res: f64 = fit.residuals.iter().map(|r| r * r).sum();
286 let mse = ss_res / (n - p) as f64;
287 let s = mse.sqrt();
288
289 if s < 1e-15 {
290 return Err(FdarError::ComputationFailed {
291 operation: "dfbetas_dffits",
292 detail: "residual standard error is near zero".into(),
293 });
294 }
295
296 let se = compute_coefficient_se(&l, mse, p);
297
298 let mut studentized_residuals = vec![0.0; n];
299 let mut dffits = vec![0.0; n];
300 let mut dfbetas = FdMatrix::zeros(n, p);
301
302 for i in 0..n {
303 let (t_i, dffits_i, dfb) =
304 compute_obs_influence(&design, &l, fit.residuals[i], hat_diag[i], s, &se, p, i);
305 studentized_residuals[i] = t_i;
306 dffits[i] = dffits_i;
307 for j in 0..p {
308 dfbetas[(i, j)] = dfb[j];
309 }
310 }
311
312 let dfbetas_cutoff = 2.0 / (n as f64).sqrt();
313 let dffits_cutoff = 2.0 * (p as f64 / n as f64).sqrt();
314
315 Ok(DfbetasDffitsResult {
316 dfbetas,
317 dffits,
318 studentized_residuals,
319 p,
320 dfbetas_cutoff,
321 dffits_cutoff,
322 })
323}
324
325fn compute_coefficient_se(l: &[f64], mse: f64, p: usize) -> Vec<f64> {
327 let mut se = vec![0.0; p];
328 for j in 0..p {
329 let mut ej = vec![0.0; p];
330 ej[j] = 1.0;
331 let v = cholesky_forward_back(l, &ej, p);
332 se[j] = (mse * v[j].max(0.0)).sqrt();
333 }
334 se
335}
336
337fn compute_obs_influence(
339 design: &FdMatrix,
340 l: &[f64],
341 residual: f64,
342 h_ii: f64,
343 s: f64,
344 se: &[f64],
345 p: usize,
346 i: usize,
347) -> (f64, f64, Vec<f64>) {
348 let one_minus_h = (1.0 - h_ii).max(1e-15);
349 let t_i = residual / (s * one_minus_h.sqrt());
350 let dffits_i = t_i * (h_ii / one_minus_h).sqrt();
351
352 let mut xi = vec![0.0; p];
353 for j in 0..p {
354 xi[j] = design[(i, j)];
355 }
356 let inv_xtx_xi = cholesky_forward_back(l, &xi, p);
357 let mut dfb = vec![0.0; p];
358 for j in 0..p {
359 if se[j] > 1e-15 {
360 dfb[j] = inv_xtx_xi[j] * residual / (one_minus_h * se[j]);
361 }
362 }
363
364 (t_i, dffits_i, dfb)
365}
366
367#[derive(Debug, Clone, PartialEq)]
373pub struct PredictionIntervalResult {
374 pub predictions: Vec<f64>,
376 pub lower: Vec<f64>,
378 pub upper: Vec<f64>,
380 pub prediction_se: Vec<f64>,
382 pub confidence_level: f64,
384 pub t_critical: f64,
386 pub residual_se: f64,
388}
389
390#[must_use = "prediction result should not be discarded"]
403pub fn prediction_intervals(
404 fit: &FregreLmResult,
405 train_data: &FdMatrix,
406 train_scalar: Option<&FdMatrix>,
407 new_data: &FdMatrix,
408 new_scalar: Option<&FdMatrix>,
409 confidence_level: f64,
410) -> Result<PredictionIntervalResult, FdarError> {
411 let (n_train, m) = train_data.shape();
412 let (n_new, m_new) = new_data.shape();
413 if confidence_level <= 0.0 || confidence_level >= 1.0 {
414 return Err(FdarError::InvalidParameter {
415 parameter: "confidence_level",
416 message: "must be in (0, 1)".into(),
417 });
418 }
419 if n_train == 0 {
420 return Err(FdarError::InvalidDimension {
421 parameter: "train_data",
422 expected: ">0 rows".into(),
423 actual: "0".into(),
424 });
425 }
426 if m != fit.fpca.mean.len() {
427 return Err(FdarError::InvalidDimension {
428 parameter: "train_data",
429 expected: format!("{} columns", fit.fpca.mean.len()),
430 actual: format!("{m}"),
431 });
432 }
433 if n_new == 0 {
434 return Err(FdarError::InvalidDimension {
435 parameter: "new_data",
436 expected: ">0 rows".into(),
437 actual: "0".into(),
438 });
439 }
440 if m_new != m {
441 return Err(FdarError::InvalidDimension {
442 parameter: "new_data",
443 expected: format!("{m} columns (matching train)"),
444 actual: format!("{m_new}"),
445 });
446 }
447 let ncomp = fit.ncomp;
448
449 let train_scores = project_scores(train_data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
450 let train_design = build_design_matrix(&train_scores, ncomp, train_scalar, n_train);
451 let p = train_design.ncols();
452 if n_train <= p {
453 return Err(FdarError::InvalidDimension {
454 parameter: "train_data",
455 expected: format!(">{p} rows (more than parameters)"),
456 actual: format!("{n_train}"),
457 });
458 }
459
460 let xtx = compute_xtx(&train_design);
461 let l = cholesky_factor(&xtx, p)?;
462
463 let residual_se = fit.residual_se;
464 let df = n_train - p;
465 let t_crit = t_critical_value(confidence_level, df);
466
467 let new_scores = project_scores(new_data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
469
470 let mut predictions = vec![0.0; n_new];
471 let mut lower = vec![0.0; n_new];
472 let mut upper = vec![0.0; n_new];
473 let mut prediction_se = vec![0.0; n_new];
474
475 let p_scalar = fit.gamma.len();
476
477 for i in 0..n_new {
478 let x_new = build_design_vector(&new_scores, new_scalar, i, ncomp, p_scalar, p);
479 let (yhat, lo, up, pse) =
480 compute_prediction_interval_obs(&l, &fit.coefficients, &x_new, p, residual_se, t_crit);
481 predictions[i] = yhat;
482 lower[i] = lo;
483 upper[i] = up;
484 prediction_se[i] = pse;
485 }
486
487 Ok(PredictionIntervalResult {
488 predictions,
489 lower,
490 upper,
491 prediction_se,
492 confidence_level,
493 t_critical: t_crit,
494 residual_se,
495 })
496}
497
498fn normal_quantile(p: f64) -> f64 {
500 if p <= 0.0 || p >= 1.0 {
501 return 0.0;
502 }
503 let t = if p < 0.5 {
504 (-2.0 * p.ln()).sqrt()
505 } else {
506 (-2.0 * (1.0 - p).ln()).sqrt()
507 };
508 let c0 = 2.515517;
509 let c1 = 0.802853;
510 let c2 = 0.010328;
511 let d1 = 1.432788;
512 let d2 = 0.189269;
513 let d3 = 0.001308;
514 let val = t - (c0 + c1 * t + c2 * t * t) / (1.0 + d1 * t + d2 * t * t + d3 * t * t * t);
515 if p < 0.5 {
516 -val
517 } else {
518 val
519 }
520}
521
522fn t_critical_value(conf: f64, df: usize) -> f64 {
524 let alpha = 1.0 - conf;
525 let z = normal_quantile(1.0 - alpha / 2.0);
526 if df == 0 {
527 return z;
528 }
529 let df_f = df as f64;
530 let g1 = (z.powi(3) + z) / (4.0 * df_f);
531 let g2 = (5.0 * z.powi(5) + 16.0 * z.powi(3) + 3.0 * z) / (96.0 * df_f * df_f);
532 let g3 = (3.0 * z.powi(7) + 19.0 * z.powi(5) + 17.0 * z.powi(3) - 15.0 * z)
533 / (384.0 * df_f * df_f * df_f);
534 z + g1 + g2 + g3
535}
536
537fn build_design_vector(
539 new_scores: &FdMatrix,
540 new_scalar: Option<&FdMatrix>,
541 i: usize,
542 ncomp: usize,
543 p_scalar: usize,
544 p: usize,
545) -> Vec<f64> {
546 let mut x = vec![0.0; p];
547 x[0] = 1.0;
548 for k in 0..ncomp {
549 x[1 + k] = new_scores[(i, k)];
550 }
551 if let Some(ns) = new_scalar {
552 for j in 0..p_scalar {
553 x[1 + ncomp + j] = ns[(i, j)];
554 }
555 }
556 x
557}
558
559fn compute_prediction_interval_obs(
561 l: &[f64],
562 coefficients: &[f64],
563 x_new: &[f64],
564 p: usize,
565 residual_se: f64,
566 t_crit: f64,
567) -> (f64, f64, f64, f64) {
568 let yhat: f64 = x_new.iter().zip(coefficients).map(|(a, b)| a * b).sum();
569 let v = cholesky_forward_back(l, x_new, p);
570 let h_new: f64 = x_new.iter().zip(&v).map(|(a, b)| a * b).sum();
571 let pred_se = residual_se * (1.0 + h_new).sqrt();
572 (
573 yhat,
574 yhat - t_crit * pred_se,
575 yhat + t_crit * pred_se,
576 pred_se,
577 )
578}
579
580#[derive(Debug, Clone, PartialEq)]
586pub struct LooCvResult {
587 pub loo_residuals: Vec<f64>,
589 pub press: f64,
591 pub loo_r_squared: f64,
593 pub leverage: Vec<f64>,
595 pub tss: f64,
597}
598
599#[must_use = "expensive computation whose result should not be discarded"]
611pub fn loo_cv_press(
612 fit: &FregreLmResult,
613 data: &FdMatrix,
614 y: &[f64],
615 scalar_covariates: Option<&FdMatrix>,
616) -> Result<LooCvResult, FdarError> {
617 let (n, m) = data.shape();
618 if n == 0 {
619 return Err(FdarError::InvalidDimension {
620 parameter: "data",
621 expected: ">0 rows".into(),
622 actual: "0".into(),
623 });
624 }
625 if n != y.len() {
626 return Err(FdarError::InvalidDimension {
627 parameter: "y",
628 expected: format!("{n} (matching data rows)"),
629 actual: format!("{}", y.len()),
630 });
631 }
632 if m != fit.fpca.mean.len() {
633 return Err(FdarError::InvalidDimension {
634 parameter: "data",
635 expected: format!("{} columns", fit.fpca.mean.len()),
636 actual: format!("{m}"),
637 });
638 }
639 let ncomp = fit.ncomp;
640 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
641 let design = build_design_matrix(&scores, ncomp, scalar_covariates, n);
642 let p = design.ncols();
643 if n <= p {
644 return Err(FdarError::InvalidDimension {
645 parameter: "data",
646 expected: format!(">{p} rows (more than parameters)"),
647 actual: format!("{n}"),
648 });
649 }
650
651 let xtx = compute_xtx(&design);
652 let l = cholesky_factor(&xtx, p)?;
653 let leverage = compute_hat_diagonal(&design, &l);
654
655 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
656 let tss: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
657 if tss == 0.0 {
658 return Err(FdarError::ComputationFailed {
659 operation: "loo_cv_press",
660 detail: "total sum of squares is zero".into(),
661 });
662 }
663
664 let mut loo_residuals = vec![0.0; n];
665 let mut press = 0.0;
666 for i in 0..n {
667 let denom = (1.0 - leverage[i]).max(1e-15);
668 loo_residuals[i] = fit.residuals[i] / denom;
669 press += loo_residuals[i] * loo_residuals[i];
670 }
671
672 let loo_r_squared = 1.0 - press / tss;
673
674 Ok(LooCvResult {
675 loo_residuals,
676 press,
677 loo_r_squared,
678 leverage,
679 tss,
680 })
681}