1use super::helpers::*;
4use crate::matrix::FdMatrix;
5use crate::scalar_on_function::{
6 build_design_matrix, cholesky_factor, cholesky_forward_back, compute_hat_diagonal, compute_xtx,
7 FregreLmResult, FunctionalLogisticResult,
8};
9
10pub struct VifResult {
16 pub vif: Vec<f64>,
18 pub labels: Vec<String>,
20 pub mean_vif: f64,
22 pub n_moderate: usize,
24 pub n_severe: usize,
26}
27
28pub fn fpc_vif(
32 fit: &FregreLmResult,
33 data: &FdMatrix,
34 scalar_covariates: Option<&FdMatrix>,
35) -> Option<VifResult> {
36 let (n, m) = data.shape();
37 if n == 0 || m != fit.fpca.mean.len() {
38 return None;
39 }
40 let ncomp = fit.ncomp;
41 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
42 compute_vif_from_scores(&scores, ncomp, scalar_covariates, n)
43}
44
45pub fn fpc_vif_logistic(
47 fit: &FunctionalLogisticResult,
48 data: &FdMatrix,
49 scalar_covariates: Option<&FdMatrix>,
50) -> Option<VifResult> {
51 let (n, m) = data.shape();
52 if n == 0 || m != fit.fpca.mean.len() {
53 return None;
54 }
55 let ncomp = fit.ncomp;
56 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
57 compute_vif_from_scores(&scores, ncomp, scalar_covariates, n)
58}
59
60pub(crate) fn compute_vif_from_scores(
61 scores: &FdMatrix,
62 ncomp: usize,
63 scalar_covariates: Option<&FdMatrix>,
64 n: usize,
65) -> Option<VifResult> {
66 let p_scalar = scalar_covariates.map_or(0, |sc| sc.ncols());
67 let p = ncomp + p_scalar;
68 if p == 0 || n <= p {
69 return None;
70 }
71
72 let x_noi = build_no_intercept_matrix(scores, ncomp, scalar_covariates, n);
73 let xtx = compute_xtx(&x_noi);
74 let l = cholesky_factor(&xtx, p)?;
75
76 let mut vif = vec![0.0; p];
77 for k in 0..p {
78 let mut ek = vec![0.0; p];
79 ek[k] = 1.0;
80 let v = cholesky_forward_back(&l, &ek, p);
81 vif[k] = v[k] * xtx[k * p + k];
82 }
83
84 let mut labels = Vec::with_capacity(p);
85 for k in 0..ncomp {
86 labels.push(format!("FPC_{}", k));
87 }
88 for j in 0..p_scalar {
89 labels.push(format!("scalar_{}", j));
90 }
91
92 let mean_vif = vif.iter().sum::<f64>() / p as f64;
93 let n_moderate = vif.iter().filter(|&&v| v > 5.0).count();
94 let n_severe = vif.iter().filter(|&&v| v > 10.0).count();
95
96 Some(VifResult {
97 vif,
98 labels,
99 mean_vif,
100 n_moderate,
101 n_severe,
102 })
103}
104
105fn build_no_intercept_matrix(
107 scores: &FdMatrix,
108 ncomp: usize,
109 scalar_covariates: Option<&FdMatrix>,
110 n: usize,
111) -> FdMatrix {
112 let p_scalar = scalar_covariates.map_or(0, |sc| sc.ncols());
113 let p = ncomp + p_scalar;
114 let mut x = FdMatrix::zeros(n, p);
115 for i in 0..n {
116 for k in 0..ncomp {
117 x[(i, k)] = scores[(i, k)];
118 }
119 if let Some(sc) = scalar_covariates {
120 for j in 0..p_scalar {
121 x[(i, ncomp + j)] = sc[(i, j)];
122 }
123 }
124 }
125 x
126}
127
128pub struct InfluenceDiagnostics {
134 pub leverage: Vec<f64>,
136 pub cooks_distance: Vec<f64>,
138 pub p: usize,
140 pub mse: f64,
142}
143
144pub fn influence_diagnostics(
146 fit: &FregreLmResult,
147 data: &FdMatrix,
148 scalar_covariates: Option<&FdMatrix>,
149) -> Option<InfluenceDiagnostics> {
150 let (n, m) = data.shape();
151 if n == 0 || m != fit.fpca.mean.len() {
152 return None;
153 }
154 let ncomp = fit.ncomp;
155 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
156 let design = build_design_matrix(&scores, ncomp, scalar_covariates, n);
157 let p = design.ncols();
158
159 if n <= p {
160 return None;
161 }
162
163 let xtx = compute_xtx(&design);
164 let l = cholesky_factor(&xtx, p)?;
165 let leverage = compute_hat_diagonal(&design, &l);
166
167 let ss_res: f64 = fit.residuals.iter().map(|r| r * r).sum();
168 let mse = ss_res / (n - p) as f64;
169
170 let mut cooks_distance = vec![0.0; n];
171 for i in 0..n {
172 let e = fit.residuals[i];
173 let h = leverage[i];
174 let denom = p as f64 * mse * (1.0 - h).powi(2);
175 cooks_distance[i] = if denom > 0.0 { e * e * h / denom } else { 0.0 };
176 }
177
178 Some(InfluenceDiagnostics {
179 leverage,
180 cooks_distance,
181 p,
182 mse,
183 })
184}
185
186pub struct DfbetasDffitsResult {
192 pub dfbetas: FdMatrix,
194 pub dffits: Vec<f64>,
196 pub studentized_residuals: Vec<f64>,
198 pub p: usize,
200 pub dfbetas_cutoff: f64,
202 pub dffits_cutoff: f64,
204}
205
206pub fn dfbetas_dffits(
211 fit: &FregreLmResult,
212 data: &FdMatrix,
213 scalar_covariates: Option<&FdMatrix>,
214) -> Option<DfbetasDffitsResult> {
215 let (n, m) = data.shape();
216 if n == 0 || m != fit.fpca.mean.len() {
217 return None;
218 }
219 let ncomp = fit.ncomp;
220 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
221 let design = build_design_matrix(&scores, ncomp, scalar_covariates, n);
222 let p = design.ncols();
223
224 if n <= p {
225 return None;
226 }
227
228 let xtx = compute_xtx(&design);
229 let l = cholesky_factor(&xtx, p)?;
230 let hat_diag = compute_hat_diagonal(&design, &l);
231
232 let ss_res: f64 = fit.residuals.iter().map(|r| r * r).sum();
233 let mse = ss_res / (n - p) as f64;
234 let s = mse.sqrt();
235
236 if s < 1e-15 {
237 return None;
238 }
239
240 let se = compute_coefficient_se(&l, mse, p);
241
242 let mut studentized_residuals = vec![0.0; n];
243 let mut dffits = vec![0.0; n];
244 let mut dfbetas = FdMatrix::zeros(n, p);
245
246 for i in 0..n {
247 let (t_i, dffits_i, dfb) =
248 compute_obs_influence(&design, &l, fit.residuals[i], hat_diag[i], s, &se, p, i);
249 studentized_residuals[i] = t_i;
250 dffits[i] = dffits_i;
251 for j in 0..p {
252 dfbetas[(i, j)] = dfb[j];
253 }
254 }
255
256 let dfbetas_cutoff = 2.0 / (n as f64).sqrt();
257 let dffits_cutoff = 2.0 * (p as f64 / n as f64).sqrt();
258
259 Some(DfbetasDffitsResult {
260 dfbetas,
261 dffits,
262 studentized_residuals,
263 p,
264 dfbetas_cutoff,
265 dffits_cutoff,
266 })
267}
268
269fn compute_coefficient_se(l: &[f64], mse: f64, p: usize) -> Vec<f64> {
271 let mut se = vec![0.0; p];
272 for j in 0..p {
273 let mut ej = vec![0.0; p];
274 ej[j] = 1.0;
275 let v = cholesky_forward_back(l, &ej, p);
276 se[j] = (mse * v[j].max(0.0)).sqrt();
277 }
278 se
279}
280
281fn compute_obs_influence(
283 design: &FdMatrix,
284 l: &[f64],
285 residual: f64,
286 h_ii: f64,
287 s: f64,
288 se: &[f64],
289 p: usize,
290 i: usize,
291) -> (f64, f64, Vec<f64>) {
292 let one_minus_h = (1.0 - h_ii).max(1e-15);
293 let t_i = residual / (s * one_minus_h.sqrt());
294 let dffits_i = t_i * (h_ii / one_minus_h).sqrt();
295
296 let mut xi = vec![0.0; p];
297 for j in 0..p {
298 xi[j] = design[(i, j)];
299 }
300 let inv_xtx_xi = cholesky_forward_back(l, &xi, p);
301 let mut dfb = vec![0.0; p];
302 for j in 0..p {
303 if se[j] > 1e-15 {
304 dfb[j] = inv_xtx_xi[j] * residual / (one_minus_h * se[j]);
305 }
306 }
307
308 (t_i, dffits_i, dfb)
309}
310
311pub struct PredictionIntervalResult {
317 pub predictions: Vec<f64>,
319 pub lower: Vec<f64>,
321 pub upper: Vec<f64>,
323 pub prediction_se: Vec<f64>,
325 pub confidence_level: f64,
327 pub t_critical: f64,
329 pub residual_se: f64,
331}
332
333pub fn prediction_intervals(
338 fit: &FregreLmResult,
339 train_data: &FdMatrix,
340 train_scalar: Option<&FdMatrix>,
341 new_data: &FdMatrix,
342 new_scalar: Option<&FdMatrix>,
343 confidence_level: f64,
344) -> Option<PredictionIntervalResult> {
345 let (n_train, m) = train_data.shape();
346 let (n_new, m_new) = new_data.shape();
347 if confidence_level <= 0.0
348 || confidence_level >= 1.0
349 || n_train == 0
350 || m != fit.fpca.mean.len()
351 || n_new == 0
352 || m_new != m
353 {
354 return None;
355 }
356 let ncomp = fit.ncomp;
357
358 let train_scores = project_scores(train_data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
359 let train_design = build_design_matrix(&train_scores, ncomp, train_scalar, n_train);
360 let p = train_design.ncols();
361 if n_train <= p {
362 return None;
363 }
364
365 let xtx = compute_xtx(&train_design);
366 let l = cholesky_factor(&xtx, p)?;
367
368 let residual_se = fit.residual_se;
369 let df = n_train - p;
370 let t_crit = t_critical_value(confidence_level, df);
371
372 let new_scores = project_scores(new_data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
374
375 let mut predictions = vec![0.0; n_new];
376 let mut lower = vec![0.0; n_new];
377 let mut upper = vec![0.0; n_new];
378 let mut prediction_se = vec![0.0; n_new];
379
380 let p_scalar = fit.gamma.len();
381
382 for i in 0..n_new {
383 let x_new = build_design_vector(&new_scores, new_scalar, i, ncomp, p_scalar, p);
384 let (yhat, lo, up, pse) =
385 compute_prediction_interval_obs(&l, &fit.coefficients, &x_new, p, residual_se, t_crit);
386 predictions[i] = yhat;
387 lower[i] = lo;
388 upper[i] = up;
389 prediction_se[i] = pse;
390 }
391
392 Some(PredictionIntervalResult {
393 predictions,
394 lower,
395 upper,
396 prediction_se,
397 confidence_level,
398 t_critical: t_crit,
399 residual_se,
400 })
401}
402
403fn normal_quantile(p: f64) -> f64 {
405 if p <= 0.0 || p >= 1.0 {
406 return 0.0;
407 }
408 let t = if p < 0.5 {
409 (-2.0 * p.ln()).sqrt()
410 } else {
411 (-2.0 * (1.0 - p).ln()).sqrt()
412 };
413 let c0 = 2.515517;
414 let c1 = 0.802853;
415 let c2 = 0.010328;
416 let d1 = 1.432788;
417 let d2 = 0.189269;
418 let d3 = 0.001308;
419 let val = t - (c0 + c1 * t + c2 * t * t) / (1.0 + d1 * t + d2 * t * t + d3 * t * t * t);
420 if p < 0.5 {
421 -val
422 } else {
423 val
424 }
425}
426
427fn t_critical_value(conf: f64, df: usize) -> f64 {
429 let alpha = 1.0 - conf;
430 let z = normal_quantile(1.0 - alpha / 2.0);
431 if df == 0 {
432 return z;
433 }
434 let df_f = df as f64;
435 let g1 = (z.powi(3) + z) / (4.0 * df_f);
436 let g2 = (5.0 * z.powi(5) + 16.0 * z.powi(3) + 3.0 * z) / (96.0 * df_f * df_f);
437 let g3 = (3.0 * z.powi(7) + 19.0 * z.powi(5) + 17.0 * z.powi(3) - 15.0 * z)
438 / (384.0 * df_f * df_f * df_f);
439 z + g1 + g2 + g3
440}
441
442fn build_design_vector(
444 new_scores: &FdMatrix,
445 new_scalar: Option<&FdMatrix>,
446 i: usize,
447 ncomp: usize,
448 p_scalar: usize,
449 p: usize,
450) -> Vec<f64> {
451 let mut x = vec![0.0; p];
452 x[0] = 1.0;
453 for k in 0..ncomp {
454 x[1 + k] = new_scores[(i, k)];
455 }
456 if let Some(ns) = new_scalar {
457 for j in 0..p_scalar {
458 x[1 + ncomp + j] = ns[(i, j)];
459 }
460 }
461 x
462}
463
464fn compute_prediction_interval_obs(
466 l: &[f64],
467 coefficients: &[f64],
468 x_new: &[f64],
469 p: usize,
470 residual_se: f64,
471 t_crit: f64,
472) -> (f64, f64, f64, f64) {
473 let yhat: f64 = x_new.iter().zip(coefficients).map(|(a, b)| a * b).sum();
474 let v = cholesky_forward_back(l, x_new, p);
475 let h_new: f64 = x_new.iter().zip(&v).map(|(a, b)| a * b).sum();
476 let pred_se = residual_se * (1.0 + h_new).sqrt();
477 (
478 yhat,
479 yhat - t_crit * pred_se,
480 yhat + t_crit * pred_se,
481 pred_se,
482 )
483}
484
485pub struct LooCvResult {
491 pub loo_residuals: Vec<f64>,
493 pub press: f64,
495 pub loo_r_squared: f64,
497 pub leverage: Vec<f64>,
499 pub tss: f64,
501}
502
503pub fn loo_cv_press(
507 fit: &FregreLmResult,
508 data: &FdMatrix,
509 y: &[f64],
510 scalar_covariates: Option<&FdMatrix>,
511) -> Option<LooCvResult> {
512 let (n, m) = data.shape();
513 if n == 0 || n != y.len() || m != fit.fpca.mean.len() {
514 return None;
515 }
516 let ncomp = fit.ncomp;
517 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
518 let design = build_design_matrix(&scores, ncomp, scalar_covariates, n);
519 let p = design.ncols();
520 if n <= p {
521 return None;
522 }
523
524 let xtx = compute_xtx(&design);
525 let l = cholesky_factor(&xtx, p)?;
526 let leverage = compute_hat_diagonal(&design, &l);
527
528 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
529 let tss: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
530 if tss == 0.0 {
531 return None;
532 }
533
534 let mut loo_residuals = vec![0.0; n];
535 let mut press = 0.0;
536 for i in 0..n {
537 let denom = (1.0 - leverage[i]).max(1e-15);
538 loo_residuals[i] = fit.residuals[i] / denom;
539 press += loo_residuals[i] * loo_residuals[i];
540 }
541
542 let loo_r_squared = 1.0 - press / tss;
543
544 Some(LooCvResult {
545 loo_residuals,
546 press,
547 loo_r_squared,
548 leverage,
549 tss,
550 })
551}