1use crate::cv::create_folds;
18use crate::iter_maybe_parallel;
19use crate::matrix::FdMatrix;
20use crate::regression::{fdata_to_pc_1d, FpcaResult};
21use rand::prelude::*;
22#[cfg(feature = "parallel")]
23use rayon::iter::ParallelIterator;
24
25pub struct FregreLmResult {
31 pub intercept: f64,
33 pub beta_t: Vec<f64>,
35 pub beta_se: Vec<f64>,
37 pub gamma: Vec<f64>,
39 pub fitted_values: Vec<f64>,
41 pub residuals: Vec<f64>,
43 pub r_squared: f64,
45 pub r_squared_adj: f64,
47 pub std_errors: Vec<f64>,
49 pub ncomp: usize,
51 pub fpca: FpcaResult,
53 pub coefficients: Vec<f64>,
55 pub residual_se: f64,
57 pub gcv: f64,
59}
60
61pub struct FregreNpResult {
63 pub fitted_values: Vec<f64>,
65 pub residuals: Vec<f64>,
67 pub r_squared: f64,
69 pub h_func: f64,
71 pub h_scalar: f64,
73 pub cv_error: f64,
75}
76
77pub struct FunctionalLogisticResult {
79 pub intercept: f64,
81 pub beta_t: Vec<f64>,
83 pub beta_se: Vec<f64>,
85 pub gamma: Vec<f64>,
87 pub probabilities: Vec<f64>,
89 pub predicted_classes: Vec<u8>,
91 pub ncomp: usize,
93 pub accuracy: f64,
95 pub std_errors: Vec<f64>,
97 pub coefficients: Vec<f64>,
99 pub log_likelihood: f64,
101 pub iterations: usize,
103 pub fpca: FpcaResult,
105}
106
107pub struct FregreCvResult {
109 pub k_values: Vec<usize>,
111 pub cv_errors: Vec<f64>,
113 pub optimal_k: usize,
115 pub min_cv_error: f64,
117}
118
119pub(crate) fn compute_xtx(x: &FdMatrix) -> Vec<f64> {
125 let (n, p) = x.shape();
126 let mut xtx = vec![0.0; p * p];
127 for k in 0..p {
128 for j in k..p {
129 let mut s = 0.0;
130 for i in 0..n {
131 s += x[(i, k)] * x[(i, j)];
132 }
133 xtx[k * p + j] = s;
134 xtx[j * p + k] = s;
135 }
136 }
137 xtx
138}
139
140fn compute_xty(x: &FdMatrix, y: &[f64]) -> Vec<f64> {
142 let (n, p) = x.shape();
143 (0..p)
144 .map(|k| {
145 let mut s = 0.0;
146 for i in 0..n {
147 s += x[(i, k)] * y[i];
148 }
149 s
150 })
151 .collect()
152}
153
154pub(crate) fn cholesky_factor(a: &[f64], p: usize) -> Option<Vec<f64>> {
156 let mut l = vec![0.0; p * p];
157 for j in 0..p {
158 let mut diag = a[j * p + j];
159 for k in 0..j {
160 diag -= l[j * p + k] * l[j * p + k];
161 }
162 if diag <= 1e-12 {
163 return None;
164 }
165 l[j * p + j] = diag.sqrt();
166 for i in (j + 1)..p {
167 let mut s = a[i * p + j];
168 for k in 0..j {
169 s -= l[i * p + k] * l[j * p + k];
170 }
171 l[i * p + j] = s / l[j * p + j];
172 }
173 }
174 Some(l)
175}
176
177pub(crate) fn cholesky_forward_back(l: &[f64], b: &[f64], p: usize) -> Vec<f64> {
179 let mut z = b.to_vec();
180 for j in 0..p {
181 for k in 0..j {
182 z[j] -= l[j * p + k] * z[k];
183 }
184 z[j] /= l[j * p + j];
185 }
186 for j in (0..p).rev() {
187 for k in (j + 1)..p {
188 z[j] -= l[k * p + j] * z[k];
189 }
190 z[j] /= l[j * p + j];
191 }
192 z
193}
194
195fn cholesky_solve(a: &[f64], b: &[f64], p: usize) -> Option<Vec<f64>> {
197 let l = cholesky_factor(a, p)?;
198 Some(cholesky_forward_back(&l, b, p))
199}
200
201pub(crate) fn compute_hat_diagonal(x: &FdMatrix, l: &[f64]) -> Vec<f64> {
203 let (n, p) = x.shape();
204 let mut hat_diag = vec![0.0; n];
205 for i in 0..n {
206 let mut v = vec![0.0; p];
207 for j in 0..p {
208 v[j] = x[(i, j)];
209 for k in 0..j {
210 v[j] -= l[j * p + k] * v[k];
211 }
212 v[j] /= l[j * p + j];
213 }
214 hat_diag[i] = v.iter().map(|vi| vi * vi).sum();
215 }
216 hat_diag
217}
218
219fn compute_ols_std_errors(l: &[f64], p: usize, sigma2: f64) -> Vec<f64> {
221 let mut se = vec![0.0; p];
222 for j in 0..p {
223 let mut v = vec![0.0; p];
224 v[j] = 1.0;
225 for k in 0..p {
226 for kk in 0..k {
227 v[k] -= l[k * p + kk] * v[kk];
228 }
229 v[k] /= l[k * p + k];
230 }
231 se[j] = (sigma2 * v.iter().map(|vi| vi * vi).sum::<f64>()).sqrt();
232 }
233 se
234}
235
236fn validate_fregre_inputs(
243 n: usize,
244 m: usize,
245 y: &[f64],
246 scalar_covariates: Option<&FdMatrix>,
247) -> Option<()> {
248 if n < 3 || m == 0 || y.len() != n {
249 return None;
250 }
251 if let Some(sc) = scalar_covariates {
252 if sc.nrows() != n {
253 return None;
254 }
255 }
256 Some(())
257}
258
259fn resolve_ncomp(
261 ncomp: usize,
262 data: &FdMatrix,
263 y: &[f64],
264 scalar_covariates: Option<&FdMatrix>,
265 n: usize,
266 m: usize,
267) -> Option<usize> {
268 if ncomp == 0 {
269 let cv = fregre_cv(data, y, scalar_covariates, 1, m.min(n - 1).min(20), 5)?;
270 Some(cv.optimal_k)
271 } else {
272 Some(ncomp.min(n - 1).min(m))
273 }
274}
275
276pub(crate) fn build_design_matrix(
277 fpca_scores: &FdMatrix,
278 ncomp: usize,
279 scalar_covariates: Option<&FdMatrix>,
280 n: usize,
281) -> FdMatrix {
282 let p_scalar = scalar_covariates.map_or(0, |sc| sc.ncols());
283 let p_total = 1 + ncomp + p_scalar;
284 let mut design = FdMatrix::zeros(n, p_total);
285 for i in 0..n {
286 design[(i, 0)] = 1.0;
287 for k in 0..ncomp {
288 design[(i, 1 + k)] = fpca_scores[(i, k)];
289 }
290 if let Some(sc) = scalar_covariates {
291 for j in 0..p_scalar {
292 design[(i, 1 + ncomp + j)] = sc[(i, j)];
293 }
294 }
295 }
296 design
297}
298
299fn recover_beta_t(fpc_coeffs: &[f64], rotation: &FdMatrix, m: usize) -> Vec<f64> {
301 let ncomp = fpc_coeffs.len();
302 let mut beta_t = vec![0.0; m];
303 for k in 0..ncomp {
304 for j in 0..m {
305 beta_t[j] += fpc_coeffs[k] * rotation[(j, k)];
306 }
307 }
308 beta_t
309}
310
311fn compute_beta_se(gamma_se: &[f64], rotation: &FdMatrix, m: usize) -> Vec<f64> {
315 let ncomp = gamma_se.len();
316 let mut beta_se = vec![0.0; m];
317 for j in 0..m {
318 let mut var_j = 0.0;
319 for k in 0..ncomp {
320 var_j += rotation[(j, k)].powi(2) * gamma_se[k].powi(2);
321 }
322 beta_se[j] = var_j.sqrt();
323 }
324 beta_se
325}
326
327fn compute_fitted(design: &FdMatrix, coeffs: &[f64]) -> Vec<f64> {
329 let (n, p) = design.shape();
330 (0..n)
331 .map(|i| {
332 let mut yhat = 0.0;
333 for j in 0..p {
334 yhat += design[(i, j)] * coeffs[j];
335 }
336 yhat
337 })
338 .collect()
339}
340
341fn compute_r_squared(y: &[f64], residuals: &[f64], p_total: usize) -> (f64, f64) {
343 let n = y.len();
344 let y_mean = y.iter().sum::<f64>() / n as f64;
345 let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
346 let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
347 let r_squared = if ss_tot > 0.0 {
348 1.0 - ss_res / ss_tot
349 } else {
350 0.0
351 };
352 let df_model = (p_total - 1) as f64;
353 let r_squared_adj = if n as f64 - df_model - 1.0 > 0.0 {
354 1.0 - (1.0 - r_squared) * (n as f64 - 1.0) / (n as f64 - df_model - 1.0)
355 } else {
356 r_squared
357 };
358 (r_squared, r_squared_adj)
359}
360
361fn ols_solve(x: &FdMatrix, y: &[f64]) -> Option<(Vec<f64>, Vec<f64>)> {
368 let (n, p) = x.shape();
369 if n < p || p == 0 {
370 return None;
371 }
372 let xtx = compute_xtx(x);
373 let xty = compute_xty(x, y);
374 let l = cholesky_factor(&xtx, p)?;
375 let b = cholesky_forward_back(&l, &xty, p);
376 let hat_diag = compute_hat_diagonal(x, &l);
377 Some((b, hat_diag))
378}
379
380pub fn fregre_lm(
399 data: &FdMatrix,
400 y: &[f64],
401 scalar_covariates: Option<&FdMatrix>,
402 ncomp: usize,
403) -> Option<FregreLmResult> {
404 let (n, m) = data.shape();
405 validate_fregre_inputs(n, m, y, scalar_covariates)?;
406
407 let ncomp = resolve_ncomp(ncomp, data, y, scalar_covariates, n, m)?;
408
409 let fpca = fdata_to_pc_1d(data, ncomp)?;
410 let design = build_design_matrix(&fpca.scores, ncomp, scalar_covariates, n);
411 let p_total = design.ncols();
412 let (coeffs, hat_diag) = ols_solve(&design, y)?;
413
414 let fitted_values = compute_fitted(&design, &coeffs);
415 let residuals: Vec<f64> = y
416 .iter()
417 .zip(&fitted_values)
418 .map(|(&yi, &yh)| yi - yh)
419 .collect();
420 let (r_squared, r_squared_adj) = compute_r_squared(y, &residuals, p_total);
421
422 let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
423 let df_resid = (n as f64 - p_total as f64).max(1.0);
424 let residual_se = (ss_res / df_resid).sqrt();
425 let sigma2 = ss_res / df_resid;
426
427 let xtx = compute_xtx(&design);
428 let l = cholesky_factor(&xtx, p_total).unwrap_or_else(|| vec![1.0; p_total * p_total]);
429 let std_errors = compute_ols_std_errors(&l, p_total, sigma2);
430
431 let gcv = hat_diag
432 .iter()
433 .zip(&residuals)
434 .map(|(&h, &r)| (r / (1.0 - h).max(1e-10)).powi(2))
435 .sum::<f64>()
436 / n as f64;
437
438 let beta_t = recover_beta_t(&coeffs[1..1 + ncomp], &fpca.rotation, m);
439 let beta_se = compute_beta_se(&std_errors[1..1 + ncomp], &fpca.rotation, m);
440 let gamma: Vec<f64> = coeffs[1 + ncomp..].to_vec();
441
442 Some(FregreLmResult {
443 intercept: coeffs[0],
444 beta_t,
445 beta_se,
446 gamma,
447 fitted_values,
448 residuals,
449 r_squared,
450 r_squared_adj,
451 std_errors,
452 ncomp,
453 fpca,
454 coefficients: coeffs,
455 residual_se,
456 gcv,
457 })
458}
459
460fn copy_rows(dst: &mut FdMatrix, src: &FdMatrix, src_rows: &[usize]) {
466 let ncols = src.ncols();
467 for (dst_i, &src_i) in src_rows.iter().enumerate() {
468 for j in 0..ncols {
469 dst[(dst_i, j)] = src[(src_i, j)];
470 }
471 }
472}
473
474fn cv_error_for_k(
476 data: &FdMatrix,
477 y: &[f64],
478 scalar_covariates: Option<&FdMatrix>,
479 k: usize,
480 n_folds: usize,
481 folds: &[usize],
482) -> Option<f64> {
483 let n = data.nrows();
484 let ncols = data.ncols();
485 let mut total_error = 0.0;
486 let mut count = 0;
487
488 for fold in 0..n_folds {
489 let train_idx: Vec<usize> = (0..n).filter(|&i| folds[i] != fold).collect();
490 let test_idx: Vec<usize> = (0..n).filter(|&i| folds[i] == fold).collect();
491 let n_test = test_idx.len();
492 if n_test == 0 || train_idx.len() < k + 2 {
493 continue;
494 }
495
496 let mut train_data = FdMatrix::zeros(train_idx.len(), ncols);
497 let mut test_data = FdMatrix::zeros(n_test, ncols);
498 copy_rows(&mut train_data, data, &train_idx);
499 copy_rows(&mut test_data, data, &test_idx);
500
501 let train_y: Vec<f64> = train_idx.iter().map(|&i| y[i]).collect();
502 let test_y: Vec<f64> = test_idx.iter().map(|&i| y[i]).collect();
503
504 let train_sc = scalar_covariates.map(|sc| {
505 let mut m = FdMatrix::zeros(train_idx.len(), sc.ncols());
506 copy_rows(&mut m, sc, &train_idx);
507 m
508 });
509 let test_sc = scalar_covariates.map(|sc| {
510 let mut m = FdMatrix::zeros(n_test, sc.ncols());
511 copy_rows(&mut m, sc, &test_idx);
512 m
513 });
514
515 let fit = match fregre_lm(&train_data, &train_y, train_sc.as_ref(), k) {
516 Some(f) => f,
517 None => continue,
518 };
519
520 let predictions = predict_fregre_lm(&fit, &test_data, test_sc.as_ref());
521 let fold_mse: f64 = predictions
522 .iter()
523 .zip(&test_y)
524 .map(|(&yhat, &yi)| (yhat - yi).powi(2))
525 .sum::<f64>()
526 / n_test as f64;
527
528 total_error += fold_mse * n_test as f64;
529 count += n_test;
530 }
531
532 if count > 0 {
533 Some(total_error / count as f64)
534 } else {
535 None
536 }
537}
538
539pub fn fregre_cv(
549 data: &FdMatrix,
550 y: &[f64],
551 scalar_covariates: Option<&FdMatrix>,
552 k_min: usize,
553 k_max: usize,
554 n_folds: usize,
555) -> Option<FregreCvResult> {
556 let n = data.nrows();
557 if n < n_folds || k_min < 1 || k_min > k_max {
558 return None;
559 }
560
561 let k_max = k_max.min(n - 2).min(data.ncols());
562
563 let folds = create_folds(n, n_folds, 42);
565
566 let mut k_values = Vec::new();
567 let mut cv_errors = Vec::new();
568
569 for k in k_min..=k_max {
570 if let Some(err) = cv_error_for_k(data, y, scalar_covariates, k, n_folds, &folds) {
571 k_values.push(k);
572 cv_errors.push(err);
573 }
574 }
575
576 if k_values.is_empty() {
577 return None;
578 }
579
580 let (optimal_idx, &min_cv_error) = cv_errors
581 .iter()
582 .enumerate()
583 .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
584 .unwrap();
585
586 Some(FregreCvResult {
587 k_values: k_values.clone(),
588 cv_errors,
589 optimal_k: k_values[optimal_idx],
590 min_cv_error,
591 })
592}
593
594pub fn predict_fregre_lm(
601 fit: &FregreLmResult,
602 new_data: &FdMatrix,
603 new_scalar: Option<&FdMatrix>,
604) -> Vec<f64> {
605 let (n_new, m) = new_data.shape();
606 let ncomp = fit.ncomp;
607 let p_scalar = fit.gamma.len();
608
609 let mut predictions = vec![0.0; n_new];
610 for i in 0..n_new {
611 let mut yhat = fit.intercept;
612 for k in 0..ncomp {
613 let mut s = 0.0;
614 for j in 0..m {
615 s += (new_data[(i, j)] - fit.fpca.mean[j]) * fit.fpca.rotation[(j, k)];
616 }
617 yhat += fit.coefficients[1 + k] * s;
618 }
619 if let Some(sc) = new_scalar {
620 for j in 0..p_scalar {
621 yhat += fit.gamma[j] * sc[(i, j)];
622 }
623 }
624 predictions[i] = yhat;
625 }
626 predictions
627}
628
629fn gaussian_kernel(d: f64, h: f64) -> f64 {
635 (-d * d / (2.0 * h * h)).exp()
636}
637
638fn compute_pairwise_distances(data: &FdMatrix, argvals: &[f64]) -> Vec<f64> {
640 let n = data.nrows();
641 let weights = crate::helpers::simpsons_weights(argvals);
642 let mut dists = vec![0.0; n * n];
643 for i in 0..n {
644 for j in (i + 1)..n {
645 let d = crate::helpers::l2_distance(&data.row(i), &data.row(j), &weights);
646 dists[i * n + j] = d;
647 dists[j * n + i] = d;
648 }
649 }
650 dists
651}
652
653fn compute_scalar_distances(sc: &FdMatrix) -> Vec<f64> {
655 let n = sc.nrows();
656 let p = sc.ncols();
657 let mut dists = vec![0.0; n * n];
658 for i in 0..n {
659 for j in (i + 1)..n {
660 let mut d2 = 0.0;
661 for k in 0..p {
662 let diff = sc[(i, k)] - sc[(j, k)];
663 d2 += diff * diff;
664 }
665 let d = d2.sqrt();
666 dists[i * n + j] = d;
667 dists[j * n + i] = d;
668 }
669 }
670 dists
671}
672
673fn nw_loo_predict(
675 i: usize,
676 n: usize,
677 y: &[f64],
678 func_dists: &[f64],
679 scalar_dists: &[f64],
680 h_func: f64,
681 h_scalar: f64,
682 has_scalar: bool,
683) -> f64 {
684 let mut num = 0.0;
685 let mut den = 0.0;
686 for j in 0..n {
687 if i == j {
688 continue;
689 }
690 let kf = gaussian_kernel(func_dists[i * n + j], h_func);
691 let ks = if has_scalar {
692 gaussian_kernel(scalar_dists[i * n + j], h_scalar)
693 } else {
694 1.0
695 };
696 let w = kf * ks;
697 num += w * y[j];
698 den += w;
699 }
700 if den > 1e-15 {
701 num / den
702 } else {
703 y[i]
704 }
705}
706
707fn loo_cv_error(dists: &[f64], y: &[f64], n: usize, h: f64) -> f64 {
709 (0..n)
710 .map(|i| {
711 let mut num = 0.0;
712 let mut den = 0.0;
713 for j in 0..n {
714 if i == j {
715 continue;
716 }
717 let w = gaussian_kernel(dists[i * n + j], h);
718 num += w * y[j];
719 den += w;
720 }
721 let yhat = if den > 1e-15 { num / den } else { y[i] };
722 (y[i] - yhat).powi(2)
723 })
724 .sum::<f64>()
725 / n as f64
726}
727
728fn select_bandwidth_loo(dists: &[f64], y: &[f64], n: usize, _other_dists: Option<&[f64]>) -> f64 {
730 let mut nonzero_dists: Vec<f64> = (0..n)
731 .flat_map(|i| ((i + 1)..n).map(move |j| dists[i * n + j]))
732 .filter(|&d| d > 0.0)
733 .collect();
734 nonzero_dists.sort_by(|a, b| a.partial_cmp(b).unwrap());
735
736 if nonzero_dists.is_empty() {
737 return 1.0;
738 }
739
740 let n_cand = 20;
741 let mut best_h = nonzero_dists[nonzero_dists.len() / 2];
742 let mut best_cv = f64::INFINITY;
743
744 for qi in 1..=n_cand {
745 let q = qi as f64 / (n_cand + 1) as f64;
746 let idx = ((nonzero_dists.len() as f64 * q) as usize).min(nonzero_dists.len() - 1);
747 let h = nonzero_dists[idx].max(1e-10);
748 let cv = loo_cv_error(dists, y, n, h);
749 if cv < best_cv {
750 best_cv = cv;
751 best_h = h;
752 }
753 }
754 best_h
755}
756
757pub fn fregre_np_mixed(
774 data: &FdMatrix,
775 y: &[f64],
776 argvals: &[f64],
777 scalar_covariates: Option<&FdMatrix>,
778 h_func: f64,
779 h_scalar: f64,
780) -> Option<FregreNpResult> {
781 let n = data.nrows();
782 if n < 3 || data.ncols() == 0 || y.len() != n || argvals.len() != data.ncols() {
783 return None;
784 }
785
786 let func_dists = compute_pairwise_distances(data, argvals);
787 let has_scalar = scalar_covariates.is_some();
788 let scalar_dists = scalar_covariates
789 .map(compute_scalar_distances)
790 .unwrap_or_default();
791
792 let h_func = if h_func <= 0.0 {
793 select_bandwidth_loo(&func_dists, y, n, None)
794 } else {
795 h_func
796 };
797
798 let h_scalar = if has_scalar && h_scalar <= 0.0 {
799 select_bandwidth_loo(&scalar_dists, y, n, Some(&func_dists))
800 } else {
801 h_scalar
802 };
803
804 let mut fitted_values = vec![0.0; n];
805 let mut cv_error = 0.0;
806 for i in 0..n {
807 fitted_values[i] = nw_loo_predict(
808 i,
809 n,
810 y,
811 &func_dists,
812 &scalar_dists,
813 h_func,
814 h_scalar,
815 has_scalar,
816 );
817 cv_error += (y[i] - fitted_values[i]).powi(2);
818 }
819 cv_error /= n as f64;
820
821 let residuals: Vec<f64> = y
822 .iter()
823 .zip(&fitted_values)
824 .map(|(&yi, &yh)| yi - yh)
825 .collect();
826 let (r_squared, _) = compute_r_squared(y, &residuals, 1);
827
828 Some(FregreNpResult {
829 fitted_values,
830 residuals,
831 r_squared,
832 h_func,
833 h_scalar,
834 cv_error,
835 })
836}
837
838pub fn predict_fregre_np(
840 train_data: &FdMatrix,
841 y: &[f64],
842 train_scalar: Option<&FdMatrix>,
843 new_data: &FdMatrix,
844 new_scalar: Option<&FdMatrix>,
845 argvals: &[f64],
846 h_func: f64,
847 h_scalar: f64,
848) -> Vec<f64> {
849 let n_train = train_data.nrows();
850 let n_new = new_data.nrows();
851 let weights = crate::helpers::simpsons_weights(argvals);
852
853 (0..n_new)
854 .map(|i| {
855 let new_row = new_data.row(i);
856 let mut num = 0.0;
857 let mut den = 0.0;
858 for j in 0..n_train {
859 let d_func = crate::helpers::l2_distance(&new_row, &train_data.row(j), &weights);
860 let kf = gaussian_kernel(d_func, h_func);
861 let ks = match (new_scalar, train_scalar) {
862 (Some(ns), Some(ts)) => {
863 let d2: f64 = (0..ns.ncols())
864 .map(|k| (ns[(i, k)] - ts[(j, k)]).powi(2))
865 .sum();
866 gaussian_kernel(d2.sqrt(), h_scalar)
867 }
868 _ => 1.0,
869 };
870 let w = kf * ks;
871 num += w * y[j];
872 den += w;
873 }
874 if den > 1e-15 {
875 num / den
876 } else {
877 0.0
878 }
879 })
880 .collect()
881}
882
883fn irls_step(design: &FdMatrix, y: &[f64], beta: &[f64]) -> Option<Vec<f64>> {
890 let (n, p) = design.shape();
891
892 let eta: Vec<f64> = (0..n)
894 .map(|i| (0..p).map(|j| design[(i, j)] * beta[j]).sum())
895 .collect();
896 let mu: Vec<f64> = eta.iter().map(|&e| sigmoid(e)).collect();
897 let w: Vec<f64> = mu.iter().map(|&p| (p * (1.0 - p)).max(1e-10)).collect();
898 let z_work: Vec<f64> = (0..n).map(|i| eta[i] + (y[i] - mu[i]) / w[i]).collect();
899
900 let mut xtwx = vec![0.0; p * p];
902 for k in 0..p {
903 for j in k..p {
904 let mut s = 0.0;
905 for i in 0..n {
906 s += design[(i, k)] * w[i] * design[(i, j)];
907 }
908 xtwx[k * p + j] = s;
909 xtwx[j * p + k] = s;
910 }
911 }
912
913 let mut xtwz = vec![0.0; p];
914 for k in 0..p {
915 let mut s = 0.0;
916 for i in 0..n {
917 s += design[(i, k)] * w[i] * z_work[i];
918 }
919 xtwz[k] = s;
920 }
921
922 cholesky_solve(&xtwx, &xtwz, p)
923}
924
925fn logistic_log_likelihood(probabilities: &[f64], y: &[f64]) -> f64 {
927 probabilities
928 .iter()
929 .zip(y)
930 .map(|(&p, &yi)| {
931 let p = p.clamp(1e-15, 1.0 - 1e-15);
932 yi * p.ln() + (1.0 - yi) * (1.0 - p).ln()
933 })
934 .sum()
935}
936
937pub(crate) fn sigmoid(x: f64) -> f64 {
939 if x >= 0.0 {
940 1.0 / (1.0 + (-x).exp())
941 } else {
942 let ex = x.exp();
943 ex / (1.0 + ex)
944 }
945}
946
947fn irls_loop(design: &FdMatrix, y: &[f64], max_iter: usize, tol: f64) -> (Vec<f64>, usize) {
949 let p_total = design.ncols();
950 let mut beta = vec![0.0; p_total];
951 let mut iterations = 0;
952 for iter in 0..max_iter {
953 iterations = iter + 1;
954 let beta_new = match irls_step(design, y, &beta) {
955 Some(b) => b,
956 None => break,
957 };
958 let change: f64 = beta_new
959 .iter()
960 .zip(&beta)
961 .map(|(a, b)| (a - b).abs())
962 .fold(0.0_f64, f64::max);
963 beta = beta_new;
964 if change < tol {
965 break;
966 }
967 }
968 (beta, iterations)
969}
970
971fn build_logistic_result(
973 design: &FdMatrix,
974 beta: Vec<f64>,
975 y: &[f64],
976 fpca: FpcaResult,
977 ncomp: usize,
978 m: usize,
979 iterations: usize,
980) -> FunctionalLogisticResult {
981 let (n, p) = design.shape();
982 let eta = compute_fitted(design, &beta);
983 let probabilities: Vec<f64> = eta.iter().map(|&e| sigmoid(e)).collect();
984 let predicted_classes: Vec<u8> = probabilities
985 .iter()
986 .map(|&p| if p >= 0.5 { 1 } else { 0 })
987 .collect();
988 let correct: usize = predicted_classes
989 .iter()
990 .zip(y)
991 .filter(|(&pred, &actual)| pred as f64 == actual)
992 .count();
993 let beta_t = recover_beta_t(&beta[1..1 + ncomp], &fpca.rotation, m);
994 let gamma: Vec<f64> = beta[1 + ncomp..].to_vec();
995
996 let w: Vec<f64> = probabilities
998 .iter()
999 .map(|&mu| (mu * (1.0 - mu)).max(1e-10))
1000 .collect();
1001 let mut xtwx = vec![0.0; p * p];
1002 for k in 0..p {
1003 for j in k..p {
1004 let mut s = 0.0;
1005 for i in 0..n {
1006 s += design[(i, k)] * w[i] * design[(i, j)];
1007 }
1008 xtwx[k * p + j] = s;
1009 xtwx[j * p + k] = s;
1010 }
1011 }
1012 let std_errors = cholesky_factor(&xtwx, p)
1013 .map(|l| compute_ols_std_errors(&l, p, 1.0))
1014 .unwrap_or_else(|| vec![f64::NAN; p]);
1015 let beta_se = compute_beta_se(&std_errors[1..1 + ncomp], &fpca.rotation, m);
1016
1017 FunctionalLogisticResult {
1018 intercept: beta[0],
1019 beta_t,
1020 beta_se,
1021 gamma,
1022 accuracy: correct as f64 / n as f64,
1023 log_likelihood: logistic_log_likelihood(&probabilities, y),
1024 probabilities,
1025 predicted_classes,
1026 ncomp,
1027 std_errors,
1028 coefficients: beta,
1029 iterations,
1030 fpca,
1031 }
1032}
1033
1034pub fn functional_logistic(
1047 data: &FdMatrix,
1048 y: &[f64],
1049 scalar_covariates: Option<&FdMatrix>,
1050 ncomp: usize,
1051 max_iter: usize,
1052 tol: f64,
1053) -> Option<FunctionalLogisticResult> {
1054 let (n, m) = data.shape();
1055 if n < 3 || m == 0 || y.len() != n {
1056 return None;
1057 }
1058 if y.iter().any(|&yi| yi != 0.0 && yi != 1.0) {
1059 return None;
1060 }
1061
1062 let ncomp = ncomp.min(n - 1).min(m);
1063 let fpca = fdata_to_pc_1d(data, ncomp)?;
1064 let design = build_design_matrix(&fpca.scores, ncomp, scalar_covariates, n);
1065
1066 let max_iter = if max_iter == 0 { 25 } else { max_iter };
1067 let tol = if tol <= 0.0 { 1e-6 } else { tol };
1068
1069 let (beta, iterations) = irls_loop(&design, y, max_iter, tol);
1070 Some(build_logistic_result(
1071 &design, beta, y, fpca, ncomp, m, iterations,
1072 ))
1073}
1074
1075pub struct BootstrapCiResult {
1081 pub lower: Vec<f64>,
1083 pub upper: Vec<f64>,
1085 pub center: Vec<f64>,
1087 pub sim_lower: Vec<f64>,
1089 pub sim_upper: Vec<f64>,
1091 pub n_boot_success: usize,
1093}
1094
1095fn subsample_rows(src: &FdMatrix, indices: &[usize]) -> FdMatrix {
1097 let ncols = src.ncols();
1098 let mut out = FdMatrix::zeros(indices.len(), ncols);
1099 for (dst_i, &src_i) in indices.iter().enumerate() {
1100 for j in 0..ncols {
1101 out[(dst_i, j)] = src[(src_i, j)];
1102 }
1103 }
1104 out
1105}
1106
1107fn quantile(sorted: &[f64], q: f64) -> f64 {
1109 if sorted.is_empty() {
1110 return f64::NAN;
1111 }
1112 if sorted.len() == 1 {
1113 return sorted[0];
1114 }
1115 let pos = q * (sorted.len() - 1) as f64;
1116 let lo = pos.floor() as usize;
1117 let hi = lo + 1;
1118 let frac = pos - lo as f64;
1119 if hi >= sorted.len() {
1120 sorted[sorted.len() - 1]
1121 } else {
1122 sorted[lo] * (1.0 - frac) + sorted[hi] * frac
1123 }
1124}
1125
1126pub fn bootstrap_ci_fregre_lm(
1141 data: &FdMatrix,
1142 y: &[f64],
1143 scalar_covariates: Option<&FdMatrix>,
1144 ncomp: usize,
1145 n_boot: usize,
1146 alpha: f64,
1147 seed: u64,
1148) -> Option<BootstrapCiResult> {
1149 let (n, m) = data.shape();
1150 if n < 3 || m == 0 || y.len() != n || n_boot == 0 || alpha <= 0.0 || alpha >= 1.0 {
1151 return None;
1152 }
1153
1154 let original = fregre_lm(data, y, scalar_covariates, ncomp)?;
1156 let center = original.beta_t.clone();
1157
1158 let boot_betas: Vec<Vec<f64>> = iter_maybe_parallel!(0..n_boot)
1160 .filter_map(|b| {
1161 let mut rng = StdRng::seed_from_u64(seed.wrapping_add(b as u64));
1162 let indices: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
1163
1164 let boot_data = subsample_rows(data, &indices);
1165 let boot_y: Vec<f64> = indices.iter().map(|&i| y[i]).collect();
1166 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &indices));
1167
1168 fregre_lm(&boot_data, &boot_y, boot_sc.as_ref(), ncomp).map(|fit| fit.beta_t)
1169 })
1170 .collect();
1171
1172 let n_boot_success = boot_betas.len();
1173 if n_boot_success < 3 {
1174 return None;
1175 }
1176
1177 let lo_q = alpha / 2.0;
1179 let hi_q = 1.0 - alpha / 2.0;
1180 let mut lower = vec![0.0; m];
1181 let mut upper = vec![0.0; m];
1182 let mut boot_se = vec![0.0; m];
1183
1184 for j in 0..m {
1185 let mut vals: Vec<f64> = boot_betas.iter().map(|b| b[j]).collect();
1186 vals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1187 lower[j] = quantile(&vals, lo_q);
1188 upper[j] = quantile(&vals, hi_q);
1189
1190 let mean_j: f64 = vals.iter().sum::<f64>() / vals.len() as f64;
1192 let var_j: f64 =
1193 vals.iter().map(|&v| (v - mean_j).powi(2)).sum::<f64>() / (vals.len() - 1) as f64;
1194 boot_se[j] = var_j.sqrt().max(1e-15);
1195 }
1196
1197 let mut t_stats: Vec<f64> = boot_betas
1199 .iter()
1200 .map(|b| {
1201 (0..m)
1202 .map(|j| ((b[j] - center[j]) / boot_se[j]).abs())
1203 .fold(0.0_f64, f64::max)
1204 })
1205 .collect();
1206 t_stats.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1207
1208 let c_alpha = quantile(&t_stats, 1.0 - alpha);
1209 let sim_lower: Vec<f64> = (0..m).map(|j| center[j] - c_alpha * boot_se[j]).collect();
1210 let sim_upper: Vec<f64> = (0..m).map(|j| center[j] + c_alpha * boot_se[j]).collect();
1211
1212 Some(BootstrapCiResult {
1213 lower,
1214 upper,
1215 center,
1216 sim_lower,
1217 sim_upper,
1218 n_boot_success,
1219 })
1220}
1221
1222pub fn bootstrap_ci_functional_logistic(
1238 data: &FdMatrix,
1239 y: &[f64],
1240 scalar_covariates: Option<&FdMatrix>,
1241 ncomp: usize,
1242 n_boot: usize,
1243 alpha: f64,
1244 seed: u64,
1245 max_iter: usize,
1246 tol: f64,
1247) -> Option<BootstrapCiResult> {
1248 let (n, m) = data.shape();
1249 if n < 3 || m == 0 || y.len() != n || n_boot == 0 || alpha <= 0.0 || alpha >= 1.0 {
1250 return None;
1251 }
1252
1253 let original = functional_logistic(data, y, scalar_covariates, ncomp, max_iter, tol)?;
1255 let center = original.beta_t.clone();
1256
1257 let boot_betas: Vec<Vec<f64>> = iter_maybe_parallel!(0..n_boot)
1259 .filter_map(|b| {
1260 let mut rng = StdRng::seed_from_u64(seed.wrapping_add(b as u64));
1261 let indices: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
1262
1263 let boot_data = subsample_rows(data, &indices);
1264 let boot_y: Vec<f64> = indices.iter().map(|&i| y[i]).collect();
1265 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &indices));
1266
1267 functional_logistic(&boot_data, &boot_y, boot_sc.as_ref(), ncomp, max_iter, tol)
1268 .map(|fit| fit.beta_t)
1269 })
1270 .collect();
1271
1272 let n_boot_success = boot_betas.len();
1273 if n_boot_success < 3 {
1274 return None;
1275 }
1276
1277 let lo_q = alpha / 2.0;
1278 let hi_q = 1.0 - alpha / 2.0;
1279 let mut lower = vec![0.0; m];
1280 let mut upper = vec![0.0; m];
1281 let mut boot_se = vec![0.0; m];
1282
1283 for j in 0..m {
1284 let mut vals: Vec<f64> = boot_betas.iter().map(|b| b[j]).collect();
1285 vals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1286 lower[j] = quantile(&vals, lo_q);
1287 upper[j] = quantile(&vals, hi_q);
1288
1289 let mean_j: f64 = vals.iter().sum::<f64>() / vals.len() as f64;
1290 let var_j: f64 =
1291 vals.iter().map(|&v| (v - mean_j).powi(2)).sum::<f64>() / (vals.len() - 1) as f64;
1292 boot_se[j] = var_j.sqrt().max(1e-15);
1293 }
1294
1295 let mut t_stats: Vec<f64> = boot_betas
1296 .iter()
1297 .map(|b| {
1298 (0..m)
1299 .map(|j| ((b[j] - center[j]) / boot_se[j]).abs())
1300 .fold(0.0_f64, f64::max)
1301 })
1302 .collect();
1303 t_stats.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1304
1305 let c_alpha = quantile(&t_stats, 1.0 - alpha);
1306 let sim_lower: Vec<f64> = (0..m).map(|j| center[j] - c_alpha * boot_se[j]).collect();
1307 let sim_upper: Vec<f64> = (0..m).map(|j| center[j] + c_alpha * boot_se[j]).collect();
1308
1309 Some(BootstrapCiResult {
1310 lower,
1311 upper,
1312 center,
1313 sim_lower,
1314 sim_upper,
1315 n_boot_success,
1316 })
1317}
1318
1319pub struct FregreBasisCvResult {
1325 pub optimal_lambda: f64,
1327 pub cv_errors: Vec<f64>,
1329 pub cv_se: Vec<f64>,
1331 pub lambda_values: Vec<f64>,
1333 pub min_cv_error: f64,
1335}
1336
1337pub fn fregre_basis_cv(
1354 data: &FdMatrix,
1355 y: &[f64],
1356 argvals: &[f64],
1357 n_folds: usize,
1358 lambda_range: Option<&[f64]>,
1359 nbasis: usize,
1360 basis_type: &crate::smooth_basis::BasisType,
1361) -> Option<FregreBasisCvResult> {
1362 use crate::smooth_basis::{smooth_basis, BasisType, FdPar};
1363
1364 let (n, m) = data.shape();
1365 if n < n_folds || m == 0 || y.len() != n || argvals.len() != m || nbasis < 2 {
1366 return None;
1367 }
1368
1369 let default_lambdas: Vec<f64> = if lambda_range.is_none() {
1371 (0..20)
1372 .map(|i| {
1373 let log_lam = -4.0 + 8.0 * i as f64 / 19.0;
1374 10.0_f64.powf(log_lam)
1375 })
1376 .collect()
1377 } else {
1378 Vec::new()
1379 };
1380 let lambdas = lambda_range.unwrap_or(&default_lambdas);
1381
1382 if lambdas.is_empty() {
1383 return None;
1384 }
1385
1386 let penalty = match basis_type {
1388 BasisType::Bspline { order } => {
1389 crate::smooth_basis::bspline_penalty_matrix(argvals, nbasis, *order, 2)
1390 }
1391 BasisType::Fourier { period } => {
1392 crate::smooth_basis::fourier_penalty_matrix(nbasis, *period, 2)
1393 }
1394 };
1395
1396 let folds = crate::cv::create_folds(n, n_folds, 42);
1398
1399 let mut cv_errors = vec![0.0; lambdas.len()];
1400 let mut cv_fold_errors: Vec<Vec<f64>> = vec![Vec::with_capacity(n_folds); lambdas.len()];
1401
1402 for fold in 0..n_folds {
1403 let (train_idx, test_idx) = crate::cv::fold_indices(&folds, fold);
1404 if train_idx.is_empty() || test_idx.is_empty() {
1405 continue;
1406 }
1407
1408 let train_data = crate::cv::subset_rows(data, &train_idx);
1409 let train_y = crate::cv::subset_vec(y, &train_idx);
1410 let test_data = crate::cv::subset_rows(data, &test_idx);
1411 let test_y = crate::cv::subset_vec(y, &test_idx);
1412
1413 for (li, &lam) in lambdas.iter().enumerate() {
1414 let fdpar = FdPar {
1415 basis_type: basis_type.clone(),
1416 nbasis,
1417 lambda: lam,
1418 lfd_order: 2,
1419 penalty_matrix: penalty.clone(),
1420 };
1421
1422 let train_result = match smooth_basis(&train_data, argvals, &fdpar) {
1424 Some(r) => r,
1425 None => {
1426 cv_fold_errors[li].push(f64::INFINITY);
1427 continue;
1428 }
1429 };
1430
1431 let test_result = match smooth_basis(&test_data, argvals, &fdpar) {
1433 Some(r) => r,
1434 None => {
1435 cv_fold_errors[li].push(f64::INFINITY);
1436 continue;
1437 }
1438 };
1439
1440 let train_coefs = &train_result.coefficients;
1442 let test_coefs = &test_result.coefficients;
1443 let n_train = train_idx.len();
1444 let n_test = test_idx.len();
1445 let k = train_coefs.ncols();
1446
1447 let mut xtx = vec![0.0; k * k];
1449 let mut xty_vec = vec![0.0; k];
1450 for i in 0..n_train {
1451 for j in 0..k {
1452 xty_vec[j] += train_coefs[(i, j)] * train_y[i];
1453 for l in 0..k {
1454 xtx[j * k + l] += train_coefs[(i, j)] * train_coefs[(i, l)];
1455 }
1456 }
1457 }
1458 for j in 0..k {
1460 for l in 0..k {
1461 xtx[j * k + l] += lam * penalty[j * k + l];
1462 }
1463 }
1464
1465 let beta = {
1467 let mut a = xtx;
1468 let mut b = xty_vec;
1469 crate::smoothing::solve_gaussian_pub(&mut a, &mut b, k)
1470 };
1471
1472 let mut fold_mse = 0.0;
1474 for i in 0..n_test {
1475 let mut yhat = 0.0;
1476 for j in 0..k {
1477 yhat += test_coefs[(i, j)] * beta[j];
1478 }
1479 fold_mse += (test_y[i] - yhat).powi(2);
1480 }
1481 fold_mse /= n_test as f64;
1482 cv_fold_errors[li].push(fold_mse);
1483 }
1484 }
1485
1486 let mut cv_se = vec![0.0; lambdas.len()];
1488 for li in 0..lambdas.len() {
1489 let errs = &cv_fold_errors[li];
1490 if errs.is_empty() {
1491 cv_errors[li] = f64::INFINITY;
1492 continue;
1493 }
1494 let mean = errs.iter().sum::<f64>() / errs.len() as f64;
1495 cv_errors[li] = mean;
1496 if errs.len() > 1 {
1497 let var =
1498 errs.iter().map(|&e| (e - mean).powi(2)).sum::<f64>() / (errs.len() - 1) as f64;
1499 cv_se[li] = (var / errs.len() as f64).sqrt();
1500 }
1501 }
1502
1503 let (best_idx, &min_cv_error) = cv_errors
1504 .iter()
1505 .enumerate()
1506 .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))?;
1507
1508 Some(FregreBasisCvResult {
1509 optimal_lambda: lambdas[best_idx],
1510 cv_errors,
1511 cv_se,
1512 lambda_values: lambdas.to_vec(),
1513 min_cv_error,
1514 })
1515}
1516
1517pub struct FregreNpCvResult {
1523 pub optimal_h: f64,
1525 pub cv_errors: Vec<f64>,
1527 pub cv_se: Vec<f64>,
1529 pub h_values: Vec<f64>,
1531 pub min_cv_error: f64,
1533}
1534
1535pub fn fregre_np_cv(
1550 data: &FdMatrix,
1551 y: &[f64],
1552 argvals: &[f64],
1553 n_folds: usize,
1554 h_range: Option<&[f64]>,
1555 scalar_covariates: Option<&FdMatrix>,
1556) -> Option<FregreNpCvResult> {
1557 let (n, m) = data.shape();
1558 if n < n_folds || m == 0 || y.len() != n || argvals.len() != m || n < 3 {
1559 return None;
1560 }
1561
1562 let func_dists = compute_pairwise_distances(data, argvals);
1564 let has_scalar = scalar_covariates.is_some();
1565 let scalar_dists = scalar_covariates
1566 .map(compute_scalar_distances)
1567 .unwrap_or_default();
1568
1569 let default_h: Vec<f64> = if h_range.is_none() {
1571 let mut nonzero: Vec<f64> = Vec::new();
1572 for i in 0..n {
1573 for j in (i + 1)..n {
1574 let d = func_dists[i * n + j];
1575 if d > 0.0 {
1576 nonzero.push(d);
1577 }
1578 }
1579 }
1580 nonzero.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1581 if nonzero.is_empty() {
1582 return None;
1583 }
1584 (1..=20)
1585 .map(|qi| {
1586 let q = 0.05 + 0.90 * (qi - 1) as f64 / 19.0;
1587 let idx = ((nonzero.len() as f64 * q) as usize).min(nonzero.len() - 1);
1588 nonzero[idx].max(1e-10)
1589 })
1590 .collect()
1591 } else {
1592 Vec::new()
1593 };
1594 let h_values = h_range.unwrap_or(&default_h);
1595
1596 if h_values.is_empty() {
1597 return None;
1598 }
1599
1600 let folds = crate::cv::create_folds(n, n_folds, 42);
1601
1602 let mut cv_errors = vec![0.0; h_values.len()];
1603 let mut cv_fold_errors: Vec<Vec<f64>> = vec![Vec::with_capacity(n_folds); h_values.len()];
1604
1605 let h_scalar = if has_scalar {
1607 select_bandwidth_loo(&scalar_dists, y, n, Some(&func_dists))
1608 } else {
1609 1.0
1610 };
1611
1612 for fold in 0..n_folds {
1613 let (train_idx, test_idx) = crate::cv::fold_indices(&folds, fold);
1614 if train_idx.is_empty() || test_idx.is_empty() {
1615 continue;
1616 }
1617
1618 let train_y: Vec<f64> = train_idx.iter().map(|&i| y[i]).collect();
1619
1620 for (hi, &h) in h_values.iter().enumerate() {
1621 let mut fold_mse = 0.0;
1622 for &ti in &test_idx {
1623 let mut num = 0.0;
1625 let mut den = 0.0;
1626 for (local_j, &tj) in train_idx.iter().enumerate() {
1627 let kf = gaussian_kernel(func_dists[ti * n + tj], h);
1628 let ks = if has_scalar {
1629 gaussian_kernel(scalar_dists[ti * n + tj], h_scalar)
1630 } else {
1631 1.0
1632 };
1633 let w = kf * ks;
1634 num += w * train_y[local_j];
1635 den += w;
1636 }
1637 let y_hat = if den > 1e-15 { num / den } else { y[ti] };
1638 fold_mse += (y[ti] - y_hat).powi(2);
1639 }
1640 fold_mse /= test_idx.len() as f64;
1641 cv_fold_errors[hi].push(fold_mse);
1642 }
1643 }
1644
1645 let mut cv_se = vec![0.0; h_values.len()];
1647 for hi in 0..h_values.len() {
1648 let errs = &cv_fold_errors[hi];
1649 if errs.is_empty() {
1650 cv_errors[hi] = f64::INFINITY;
1651 continue;
1652 }
1653 let mean = errs.iter().sum::<f64>() / errs.len() as f64;
1654 cv_errors[hi] = mean;
1655 if errs.len() > 1 {
1656 let var =
1657 errs.iter().map(|&e| (e - mean).powi(2)).sum::<f64>() / (errs.len() - 1) as f64;
1658 cv_se[hi] = (var / errs.len() as f64).sqrt();
1659 }
1660 }
1661
1662 let (best_idx, &min_cv_error) = cv_errors
1663 .iter()
1664 .enumerate()
1665 .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))?;
1666
1667 Some(FregreNpCvResult {
1668 optimal_h: h_values[best_idx],
1669 cv_errors,
1670 cv_se,
1671 h_values: h_values.to_vec(),
1672 min_cv_error,
1673 })
1674}
1675
1676#[cfg(test)]
1681mod tests {
1682 use super::*;
1683 use std::f64::consts::PI;
1684
1685 fn generate_test_data(n: usize, m: usize, seed: u64) -> (FdMatrix, Vec<f64>, Vec<f64>) {
1686 let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
1687 let mut data = FdMatrix::zeros(n, m);
1688 let mut y = vec![0.0; n];
1689
1690 for i in 0..n {
1691 let phase =
1692 (seed.wrapping_mul(17).wrapping_add(i as u64 * 31) % 1000) as f64 / 1000.0 * PI;
1693 let amplitude =
1694 ((seed.wrapping_mul(13).wrapping_add(i as u64 * 7) % 100) as f64 / 100.0) - 0.5;
1695 for j in 0..m {
1696 data[(i, j)] =
1697 (2.0 * PI * t[j] + phase).sin() + amplitude * (4.0 * PI * t[j]).cos();
1698 }
1699 y[i] = 2.0 * phase + 3.0 * amplitude + 0.05 * (seed.wrapping_add(i as u64) % 10) as f64;
1700 }
1701 (data, y, t)
1702 }
1703
1704 #[test]
1707 fn test_fregre_lm_basic() {
1708 let (data, y, _t) = generate_test_data(30, 50, 42);
1709 let result = fregre_lm(&data, &y, None, 3);
1710 assert!(result.is_some());
1711 let fit = result.unwrap();
1712 assert_eq!(fit.fitted_values.len(), 30);
1713 assert_eq!(fit.residuals.len(), 30);
1714 assert_eq!(fit.beta_t.len(), 50);
1715 assert_eq!(fit.ncomp, 3);
1716 assert!(fit.r_squared >= 0.0 && fit.r_squared <= 1.0 + 1e-10);
1717 }
1718
1719 #[test]
1720 fn test_fregre_lm_with_scalar_covariates() {
1721 let (data, y, _t) = generate_test_data(30, 50, 42);
1722 let mut sc = FdMatrix::zeros(30, 2);
1723 for i in 0..30 {
1724 sc[(i, 0)] = i as f64 / 30.0;
1725 sc[(i, 1)] = (i as f64 * 0.7).sin();
1726 }
1727 let result = fregre_lm(&data, &y, Some(&sc), 3);
1728 assert!(result.is_some());
1729 let fit = result.unwrap();
1730 assert_eq!(fit.gamma.len(), 2);
1731 }
1732
1733 #[test]
1734 fn test_fregre_lm_residuals_sum_near_zero() {
1735 let (data, y, _t) = generate_test_data(30, 50, 42);
1736 let fit = fregre_lm(&data, &y, None, 3).unwrap();
1737 let resid_sum: f64 = fit.residuals.iter().sum::<f64>();
1738 assert!(
1739 resid_sum.abs() < 1e-8,
1740 "Residuals should sum to ~0 with intercept, got {}",
1741 resid_sum
1742 );
1743 }
1744
1745 #[test]
1746 fn test_fregre_lm_fitted_plus_residuals_equals_y() {
1747 let (data, y, _t) = generate_test_data(30, 50, 42);
1748 let fit = fregre_lm(&data, &y, None, 3).unwrap();
1749 for i in 0..30 {
1750 let reconstructed = fit.fitted_values[i] + fit.residuals[i];
1751 assert!(
1752 (reconstructed - y[i]).abs() < 1e-10,
1753 "ŷ + r should equal y at index {}",
1754 i
1755 );
1756 }
1757 }
1758
1759 #[test]
1760 fn test_fregre_lm_more_components_higher_r2() {
1761 let (data, y, _t) = generate_test_data(50, 50, 42);
1762 let fit1 = fregre_lm(&data, &y, None, 1).unwrap();
1763 let fit3 = fregre_lm(&data, &y, None, 3).unwrap();
1764 assert!(
1765 fit3.r_squared >= fit1.r_squared - 1e-10,
1766 "More components should give >= R²: {} vs {}",
1767 fit3.r_squared,
1768 fit1.r_squared
1769 );
1770 }
1771
1772 #[test]
1773 fn test_fregre_lm_invalid_input() {
1774 let data = FdMatrix::zeros(2, 50);
1775 let y = vec![1.0, 2.0];
1776 assert!(fregre_lm(&data, &y, None, 1).is_none());
1777
1778 let data = FdMatrix::zeros(10, 50);
1779 let y = vec![1.0; 5];
1780 assert!(fregre_lm(&data, &y, None, 2).is_none());
1781 }
1782
1783 #[test]
1784 fn test_fregre_lm_std_errors_positive() {
1785 let (data, y, _t) = generate_test_data(30, 50, 42);
1786 let fit = fregre_lm(&data, &y, None, 3).unwrap();
1787 for (i, &se) in fit.std_errors.iter().enumerate() {
1788 assert!(
1789 se > 0.0 && se.is_finite(),
1790 "Std error {} should be positive finite, got {}",
1791 i,
1792 se
1793 );
1794 }
1795 }
1796
1797 #[test]
1800 fn test_predict_fregre_lm_on_training_data() {
1801 let (data, y, _t) = generate_test_data(30, 50, 42);
1802 let fit = fregre_lm(&data, &y, None, 3).unwrap();
1803 let preds = predict_fregre_lm(&fit, &data, None);
1804 for i in 0..30 {
1805 assert!(
1806 (preds[i] - fit.fitted_values[i]).abs() < 1e-6,
1807 "Prediction on training data should match fitted values"
1808 );
1809 }
1810 }
1811
1812 #[test]
1815 fn test_fregre_cv_returns_result() {
1816 let (data, y, _t) = generate_test_data(30, 50, 42);
1817 let result = fregre_cv(&data, &y, None, 1, 8, 5);
1818 assert!(result.is_some());
1819 let cv = result.unwrap();
1820 assert!(cv.optimal_k >= 1 && cv.optimal_k <= 8);
1821 assert!(cv.min_cv_error >= 0.0);
1822 }
1823
1824 #[test]
1825 fn test_fregre_cv_with_scalar_covariates() {
1826 let (data, y, _t) = generate_test_data(30, 50, 42);
1827 let mut sc = FdMatrix::zeros(30, 1);
1828 for i in 0..30 {
1829 sc[(i, 0)] = i as f64;
1830 }
1831 let result = fregre_cv(&data, &y, Some(&sc), 1, 5, 3);
1832 assert!(result.is_some());
1833 }
1834
1835 #[test]
1838 fn test_fregre_np_mixed_basic() {
1839 let (data, y, t) = generate_test_data(30, 50, 42);
1840 let result = fregre_np_mixed(&data, &y, &t, None, 0.0, 0.0);
1841 assert!(result.is_some());
1842 let fit = result.unwrap();
1843 assert_eq!(fit.fitted_values.len(), 30);
1844 assert!(fit.h_func > 0.0);
1845 assert!(fit.cv_error >= 0.0);
1846 }
1847
1848 #[test]
1849 fn test_fregre_np_mixed_with_scalars() {
1850 let (data, y, t) = generate_test_data(30, 50, 42);
1851 let mut sc = FdMatrix::zeros(30, 1);
1852 for i in 0..30 {
1853 sc[(i, 0)] = i as f64 / 30.0;
1854 }
1855 let result = fregre_np_mixed(&data, &y, &t, Some(&sc), 0.0, 0.0);
1856 assert!(result.is_some());
1857 let fit = result.unwrap();
1858 assert!(fit.h_scalar > 0.0);
1859 }
1860
1861 #[test]
1862 fn test_fregre_np_mixed_manual_bandwidth() {
1863 let (data, y, t) = generate_test_data(30, 50, 42);
1864 let result = fregre_np_mixed(&data, &y, &t, None, 0.5, 0.0);
1865 assert!(result.is_some());
1866 let fit = result.unwrap();
1867 assert!(
1868 (fit.h_func - 0.5).abs() < 1e-10,
1869 "Should use provided bandwidth"
1870 );
1871 }
1872
1873 #[test]
1876 fn test_functional_logistic_basic() {
1877 let (data, y_cont, _t) = generate_test_data(30, 50, 42);
1878 let y_median = {
1879 let mut sorted = y_cont.clone();
1880 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
1881 sorted[sorted.len() / 2]
1882 };
1883 let y_bin: Vec<f64> = y_cont
1884 .iter()
1885 .map(|&v| if v >= y_median { 1.0 } else { 0.0 })
1886 .collect();
1887
1888 let result = functional_logistic(&data, &y_bin, None, 3, 25, 1e-6);
1889 assert!(result.is_some());
1890 let fit = result.unwrap();
1891 assert_eq!(fit.probabilities.len(), 30);
1892 assert_eq!(fit.predicted_classes.len(), 30);
1893 assert!(fit.accuracy >= 0.0 && fit.accuracy <= 1.0);
1894 for &p in &fit.probabilities {
1895 assert!((0.0..=1.0).contains(&p), "Probability out of range: {}", p);
1896 }
1897 }
1898
1899 #[test]
1900 fn test_functional_logistic_with_scalar_covariates() {
1901 let (data, y_cont, _t) = generate_test_data(30, 50, 42);
1902 let y_median = {
1903 let mut sorted = y_cont.clone();
1904 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
1905 sorted[sorted.len() / 2]
1906 };
1907 let y_bin: Vec<f64> = y_cont
1908 .iter()
1909 .map(|&v| if v >= y_median { 1.0 } else { 0.0 })
1910 .collect();
1911
1912 let mut sc = FdMatrix::zeros(30, 1);
1913 for i in 0..30 {
1914 sc[(i, 0)] = i as f64 / 30.0;
1915 }
1916 let result = functional_logistic(&data, &y_bin, Some(&sc), 3, 25, 1e-6);
1917 assert!(result.is_some());
1918 let fit = result.unwrap();
1919 assert_eq!(fit.gamma.len(), 1);
1920 }
1921
1922 #[test]
1923 fn test_functional_logistic_invalid_response() {
1924 let (data, _, _) = generate_test_data(30, 50, 42);
1925 let y: Vec<f64> = (0..30).map(|i| i as f64).collect();
1926 assert!(functional_logistic(&data, &y, None, 3, 25, 1e-6).is_none());
1927 }
1928
1929 #[test]
1930 fn test_functional_logistic_convergence() {
1931 let (data, y_cont, _t) = generate_test_data(40, 50, 42);
1932 let y_median = {
1933 let mut sorted = y_cont.clone();
1934 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
1935 sorted[sorted.len() / 2]
1936 };
1937 let y_bin: Vec<f64> = y_cont
1938 .iter()
1939 .map(|&v| if v >= y_median { 1.0 } else { 0.0 })
1940 .collect();
1941
1942 let fit = functional_logistic(&data, &y_bin, None, 3, 100, 1e-8).unwrap();
1943 assert!(fit.iterations <= 100, "Should converge within max_iter");
1944 }
1945
1946 #[test]
1949 fn test_fregre_lm_single_component() {
1950 let (data, y, _t) = generate_test_data(20, 50, 42);
1951 let result = fregre_lm(&data, &y, None, 1);
1952 assert!(result.is_some());
1953 let fit = result.unwrap();
1954 assert_eq!(fit.ncomp, 1);
1955 }
1956
1957 #[test]
1958 fn test_fregre_lm_auto_k_selection() {
1959 let (data, y, _t) = generate_test_data(30, 50, 42);
1960 let result = fregre_lm(&data, &y, None, 0);
1961 assert!(result.is_some());
1962 let fit = result.unwrap();
1963 assert!(fit.ncomp >= 1);
1964 }
1965
1966 #[test]
1967 fn test_predict_fregre_np_basic() {
1968 let (data, y, t) = generate_test_data(30, 50, 42);
1969 let preds = predict_fregre_np(&data, &y, None, &data, None, &t, 0.5, 0.5);
1970 assert_eq!(preds.len(), 30);
1971 for &p in &preds {
1972 assert!(p.is_finite(), "Prediction should be finite");
1973 }
1974 }
1975
1976 #[test]
1977 fn test_sigmoid_properties() {
1978 assert!((sigmoid(0.0) - 0.5).abs() < 1e-10);
1979 assert!(sigmoid(10.0) > 0.999);
1980 assert!(sigmoid(-10.0) < 0.001);
1981 assert!((sigmoid(3.0) + sigmoid(-3.0) - 1.0).abs() < 1e-10);
1982 }
1983
1984 #[test]
1987 fn test_fregre_lm_beta_se() {
1988 let (data, y, _t) = generate_test_data(30, 50, 42);
1989 let fit = fregre_lm(&data, &y, None, 3).unwrap();
1990 assert_eq!(fit.beta_se.len(), 50, "beta_se should have length m");
1991 for (j, &se) in fit.beta_se.iter().enumerate() {
1992 assert!(
1993 se > 0.0 && se.is_finite(),
1994 "beta_se[{}] should be positive finite, got {}",
1995 j,
1996 se
1997 );
1998 }
1999 }
2000
2001 #[test]
2002 fn test_fregre_lm_beta_se_coverage() {
2003 let (data, y, _t) = generate_test_data(50, 50, 99);
2005 let fit = fregre_lm(&data, &y, None, 3).unwrap();
2006 assert_eq!(fit.beta_se.len(), 50);
2007 for (j, &se) in fit.beta_se.iter().enumerate() {
2009 assert!(
2010 se > 0.0 && se.is_finite(),
2011 "beta_se[{}] should be positive finite, got {}",
2012 j,
2013 se
2014 );
2015 }
2016 for j in 0..50 {
2018 let width = 4.0 * fit.beta_se[j];
2019 assert!(width > 0.0, "CI width should be positive at j={}", j);
2020 }
2021 }
2022
2023 #[test]
2024 fn test_functional_logistic_beta_se() {
2025 let (data, y_cont, _t) = generate_test_data(30, 50, 42);
2026 let y_median = {
2027 let mut sorted = y_cont.clone();
2028 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
2029 sorted[sorted.len() / 2]
2030 };
2031 let y_bin: Vec<f64> = y_cont
2032 .iter()
2033 .map(|&v| if v >= y_median { 1.0 } else { 0.0 })
2034 .collect();
2035
2036 let fit = functional_logistic(&data, &y_bin, None, 3, 25, 1e-6).unwrap();
2037 assert_eq!(fit.beta_se.len(), 50, "beta_se should have length m");
2038 assert_eq!(
2039 fit.std_errors.len(),
2040 1 + 3,
2041 "std_errors should have length 1 + ncomp"
2042 );
2043 for (j, &se) in fit.beta_se.iter().enumerate() {
2044 assert!(
2045 se > 0.0 && se.is_finite(),
2046 "beta_se[{}] should be positive finite, got {}",
2047 j,
2048 se
2049 );
2050 }
2051 for (j, &se) in fit.std_errors.iter().enumerate() {
2052 assert!(
2053 se > 0.0 && se.is_finite(),
2054 "std_errors[{}] should be positive finite, got {}",
2055 j,
2056 se
2057 );
2058 }
2059 }
2060
2061 #[test]
2062 fn test_beta_se_zero_for_constant() {
2063 let n = 30;
2065 let m = 20;
2066 let mut data = FdMatrix::zeros(n, m);
2067 let mut y = vec![0.0; n];
2068 for i in 0..n {
2069 for j in 0..m {
2070 data[(i, j)] = 1.0 + 0.001 * (i as f64 / n as f64);
2072 }
2073 y[i] = i as f64 / n as f64;
2074 }
2075 let fit = fregre_lm(&data, &y, None, 1).unwrap();
2076 assert_eq!(fit.beta_se.len(), m);
2077 for (j, &se) in fit.beta_se.iter().enumerate() {
2078 assert!(
2079 se.is_finite() && se >= 0.0,
2080 "beta_se[{}] should be finite non-negative, got {}",
2081 j,
2082 se
2083 );
2084 }
2085 }
2086
2087 #[test]
2090 fn test_bootstrap_ci_fregre_lm_shape() {
2091 let (data, y, _t) = generate_test_data(30, 20, 42);
2092 let result = bootstrap_ci_fregre_lm(&data, &y, None, 2, 50, 0.05, 123);
2093 assert!(result.is_some(), "bootstrap_ci_fregre_lm should succeed");
2094 let ci = result.unwrap();
2095 assert_eq!(ci.lower.len(), 20);
2096 assert_eq!(ci.upper.len(), 20);
2097 assert_eq!(ci.center.len(), 20);
2098 assert_eq!(ci.sim_lower.len(), 20);
2099 assert_eq!(ci.sim_upper.len(), 20);
2100 assert!(ci.n_boot_success > 0);
2101 }
2102
2103 #[test]
2104 fn test_bootstrap_ci_fregre_lm_ordering() {
2105 let (data, y, _t) = generate_test_data(30, 20, 42);
2106 let ci = bootstrap_ci_fregre_lm(&data, &y, None, 2, 100, 0.05, 42).unwrap();
2107 for j in 0..20 {
2108 assert!(
2110 ci.lower[j] <= ci.center[j] + 1e-10,
2111 "lower <= center at j={}: {} > {}",
2112 j,
2113 ci.lower[j],
2114 ci.center[j]
2115 );
2116 assert!(
2117 ci.center[j] <= ci.upper[j] + 1e-10,
2118 "center <= upper at j={}: {} > {}",
2119 j,
2120 ci.center[j],
2121 ci.upper[j]
2122 );
2123 assert!(
2125 ci.sim_lower[j] <= ci.center[j] + 1e-10,
2126 "sim_lower <= center at j={}: {} > {}",
2127 j,
2128 ci.sim_lower[j],
2129 ci.center[j]
2130 );
2131 assert!(
2132 ci.center[j] <= ci.sim_upper[j] + 1e-10,
2133 "center <= sim_upper at j={}: {} > {}",
2134 j,
2135 ci.center[j],
2136 ci.sim_upper[j]
2137 );
2138 }
2139 let pw_width: f64 = (0..20).map(|j| ci.upper[j] - ci.lower[j]).sum::<f64>() / 20.0;
2141 let sim_width: f64 = (0..20)
2142 .map(|j| ci.sim_upper[j] - ci.sim_lower[j])
2143 .sum::<f64>()
2144 / 20.0;
2145 assert!(
2146 sim_width >= pw_width - 1e-10,
2147 "Simultaneous band should be wider on average: sim={}, pw={}",
2148 sim_width,
2149 pw_width
2150 );
2151 }
2152
2153 #[test]
2154 fn test_bootstrap_ci_fregre_lm_center_matches_fit() {
2155 let (data, y, _t) = generate_test_data(30, 20, 42);
2156 let fit = fregre_lm(&data, &y, None, 2).unwrap();
2157 let ci = bootstrap_ci_fregre_lm(&data, &y, None, 2, 50, 0.05, 42).unwrap();
2158 for j in 0..20 {
2159 assert!(
2160 (ci.center[j] - fit.beta_t[j]).abs() < 1e-12,
2161 "center should match original beta_t at j={}",
2162 j
2163 );
2164 }
2165 }
2166
2167 #[test]
2168 fn test_bootstrap_ci_functional_logistic_shape() {
2169 let (data, y_cont, _t) = generate_test_data(40, 20, 42);
2170 let y_median = {
2171 let mut sorted = y_cont.clone();
2172 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
2173 sorted[sorted.len() / 2]
2174 };
2175 let y_bin: Vec<f64> = y_cont
2176 .iter()
2177 .map(|&v| if v >= y_median { 1.0 } else { 0.0 })
2178 .collect();
2179
2180 let result =
2181 bootstrap_ci_functional_logistic(&data, &y_bin, None, 2, 50, 0.05, 123, 25, 1e-6);
2182 assert!(
2183 result.is_some(),
2184 "bootstrap_ci_functional_logistic should succeed"
2185 );
2186 let ci = result.unwrap();
2187 assert_eq!(ci.lower.len(), 20);
2188 assert_eq!(ci.upper.len(), 20);
2189 assert_eq!(ci.center.len(), 20);
2190 assert!(ci.n_boot_success > 0);
2191 }
2192
2193 #[test]
2194 fn test_bootstrap_ci_logistic_ordering() {
2195 let (data, y_cont, _t) = generate_test_data(40, 20, 42);
2196 let y_median = {
2197 let mut sorted = y_cont.clone();
2198 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
2199 sorted[sorted.len() / 2]
2200 };
2201 let y_bin: Vec<f64> = y_cont
2202 .iter()
2203 .map(|&v| if v >= y_median { 1.0 } else { 0.0 })
2204 .collect();
2205
2206 let ci = bootstrap_ci_functional_logistic(&data, &y_bin, None, 2, 100, 0.05, 42, 25, 1e-6)
2207 .unwrap();
2208 for j in 0..20 {
2209 assert!(
2210 ci.lower[j] <= ci.upper[j] + 1e-10,
2211 "lower <= upper at j={}",
2212 j
2213 );
2214 }
2215 }
2216
2217 #[test]
2220 fn test_fregre_basis_cv_returns_result() {
2221 let (data, y, t) = generate_test_data(30, 20, 42);
2222 let result = fregre_basis_cv(
2223 &data,
2224 &y,
2225 &t,
2226 5,
2227 None,
2228 7,
2229 &crate::smooth_basis::BasisType::Bspline { order: 4 },
2230 );
2231 assert!(result.is_some(), "fregre_basis_cv should succeed");
2232 let res = result.unwrap();
2233 assert!(res.optimal_lambda > 0.0);
2234 assert_eq!(res.cv_errors.len(), 20); assert!(res.min_cv_error >= 0.0);
2236 }
2237
2238 #[test]
2239 fn test_fregre_basis_cv_custom_lambdas() {
2240 let (data, y, t) = generate_test_data(25, 15, 42);
2241 let lambdas = vec![0.001, 0.01, 0.1, 1.0, 10.0];
2242 let result = fregre_basis_cv(
2243 &data,
2244 &y,
2245 &t,
2246 5,
2247 Some(&lambdas),
2248 7,
2249 &crate::smooth_basis::BasisType::Bspline { order: 4 },
2250 );
2251 assert!(result.is_some());
2252 let res = result.unwrap();
2253 assert_eq!(res.lambda_values.len(), 5);
2254 assert!(lambdas.contains(&res.optimal_lambda));
2255 }
2256
2257 #[test]
2260 fn test_fregre_np_cv_returns_result() {
2261 let (data, y, t) = generate_test_data(25, 15, 42);
2262 let result = fregre_np_cv(&data, &y, &t, 5, None, None);
2263 assert!(result.is_some(), "fregre_np_cv should succeed");
2264 let res = result.unwrap();
2265 assert!(res.optimal_h > 0.0);
2266 assert_eq!(res.cv_errors.len(), 20); assert!(res.min_cv_error >= 0.0);
2268 }
2269
2270 #[test]
2271 fn test_fregre_np_cv_custom_h() {
2272 let (data, y, t) = generate_test_data(20, 10, 42);
2273 let h_vals = vec![0.1, 0.5, 1.0, 2.0];
2274 let result = fregre_np_cv(&data, &y, &t, 3, Some(&h_vals), None);
2275 assert!(result.is_some());
2276 let res = result.unwrap();
2277 assert_eq!(res.h_values.len(), 4);
2278 assert!(h_vals.contains(&res.optimal_h));
2279 }
2280}