1use crate::matrix::FdMatrix;
18use crate::regression::{fdata_to_pc_1d, FpcaResult};
19
20pub struct FregreLmResult {
26 pub intercept: f64,
28 pub beta_t: Vec<f64>,
30 pub gamma: Vec<f64>,
32 pub fitted_values: Vec<f64>,
34 pub residuals: Vec<f64>,
36 pub r_squared: f64,
38 pub r_squared_adj: f64,
40 pub std_errors: Vec<f64>,
42 pub ncomp: usize,
44 pub fpca: FpcaResult,
46 pub coefficients: Vec<f64>,
48 pub residual_se: f64,
50 pub gcv: f64,
52}
53
54pub struct FregreNpResult {
56 pub fitted_values: Vec<f64>,
58 pub residuals: Vec<f64>,
60 pub r_squared: f64,
62 pub h_func: f64,
64 pub h_scalar: f64,
66 pub cv_error: f64,
68}
69
70pub struct FunctionalLogisticResult {
72 pub intercept: f64,
74 pub beta_t: Vec<f64>,
76 pub gamma: Vec<f64>,
78 pub probabilities: Vec<f64>,
80 pub predicted_classes: Vec<u8>,
82 pub ncomp: usize,
84 pub accuracy: f64,
86 pub coefficients: Vec<f64>,
88 pub log_likelihood: f64,
90 pub iterations: usize,
92 pub fpca: FpcaResult,
94}
95
96pub struct FregreCvResult {
98 pub k_values: Vec<usize>,
100 pub cv_errors: Vec<f64>,
102 pub optimal_k: usize,
104 pub min_cv_error: f64,
106}
107
108fn compute_xtx(x: &FdMatrix) -> Vec<f64> {
114 let (n, p) = x.shape();
115 let mut xtx = vec![0.0; p * p];
116 for k in 0..p {
117 for j in k..p {
118 let mut s = 0.0;
119 for i in 0..n {
120 s += x[(i, k)] * x[(i, j)];
121 }
122 xtx[k * p + j] = s;
123 xtx[j * p + k] = s;
124 }
125 }
126 xtx
127}
128
129fn compute_xty(x: &FdMatrix, y: &[f64]) -> Vec<f64> {
131 let (n, p) = x.shape();
132 (0..p)
133 .map(|k| {
134 let mut s = 0.0;
135 for i in 0..n {
136 s += x[(i, k)] * y[i];
137 }
138 s
139 })
140 .collect()
141}
142
143fn cholesky_factor(a: &[f64], p: usize) -> Option<Vec<f64>> {
145 let mut l = vec![0.0; p * p];
146 for j in 0..p {
147 let mut diag = a[j * p + j];
148 for k in 0..j {
149 diag -= l[j * p + k] * l[j * p + k];
150 }
151 if diag <= 1e-12 {
152 return None;
153 }
154 l[j * p + j] = diag.sqrt();
155 for i in (j + 1)..p {
156 let mut s = a[i * p + j];
157 for k in 0..j {
158 s -= l[i * p + k] * l[j * p + k];
159 }
160 l[i * p + j] = s / l[j * p + j];
161 }
162 }
163 Some(l)
164}
165
166fn cholesky_forward_back(l: &[f64], b: &[f64], p: usize) -> Vec<f64> {
168 let mut z = b.to_vec();
169 for j in 0..p {
170 for k in 0..j {
171 z[j] -= l[j * p + k] * z[k];
172 }
173 z[j] /= l[j * p + j];
174 }
175 for j in (0..p).rev() {
176 for k in (j + 1)..p {
177 z[j] -= l[k * p + j] * z[k];
178 }
179 z[j] /= l[j * p + j];
180 }
181 z
182}
183
184fn cholesky_solve(a: &[f64], b: &[f64], p: usize) -> Option<Vec<f64>> {
186 let l = cholesky_factor(a, p)?;
187 Some(cholesky_forward_back(&l, b, p))
188}
189
190fn compute_hat_diagonal(x: &FdMatrix, l: &[f64]) -> Vec<f64> {
192 let (n, p) = x.shape();
193 let mut hat_diag = vec![0.0; n];
194 for i in 0..n {
195 let mut v = vec![0.0; p];
196 for j in 0..p {
197 v[j] = x[(i, j)];
198 for k in 0..j {
199 v[j] -= l[j * p + k] * v[k];
200 }
201 v[j] /= l[j * p + j];
202 }
203 hat_diag[i] = v.iter().map(|vi| vi * vi).sum();
204 }
205 hat_diag
206}
207
208fn compute_ols_std_errors(l: &[f64], p: usize, sigma2: f64) -> Vec<f64> {
210 let mut se = vec![0.0; p];
211 for j in 0..p {
212 let mut v = vec![0.0; p];
213 v[j] = 1.0;
214 for k in 0..p {
215 for kk in 0..k {
216 v[k] -= l[k * p + kk] * v[kk];
217 }
218 v[k] /= l[k * p + k];
219 }
220 se[j] = (sigma2 * v.iter().map(|vi| vi * vi).sum::<f64>()).sqrt();
221 }
222 se
223}
224
225fn build_design_matrix(
231 fpca_scores: &FdMatrix,
232 ncomp: usize,
233 scalar_covariates: Option<&FdMatrix>,
234 n: usize,
235) -> FdMatrix {
236 let p_scalar = scalar_covariates.map_or(0, |sc| sc.ncols());
237 let p_total = 1 + ncomp + p_scalar;
238 let mut design = FdMatrix::zeros(n, p_total);
239 for i in 0..n {
240 design[(i, 0)] = 1.0;
241 for k in 0..ncomp {
242 design[(i, 1 + k)] = fpca_scores[(i, k)];
243 }
244 if let Some(sc) = scalar_covariates {
245 for j in 0..p_scalar {
246 design[(i, 1 + ncomp + j)] = sc[(i, j)];
247 }
248 }
249 }
250 design
251}
252
253fn recover_beta_t(fpc_coeffs: &[f64], rotation: &FdMatrix, m: usize) -> Vec<f64> {
255 let ncomp = fpc_coeffs.len();
256 let mut beta_t = vec![0.0; m];
257 for k in 0..ncomp {
258 for j in 0..m {
259 beta_t[j] += fpc_coeffs[k] * rotation[(j, k)];
260 }
261 }
262 beta_t
263}
264
265fn compute_fitted(design: &FdMatrix, coeffs: &[f64]) -> Vec<f64> {
267 let (n, p) = design.shape();
268 (0..n)
269 .map(|i| {
270 let mut yhat = 0.0;
271 for j in 0..p {
272 yhat += design[(i, j)] * coeffs[j];
273 }
274 yhat
275 })
276 .collect()
277}
278
279fn compute_r_squared(y: &[f64], residuals: &[f64], p_total: usize) -> (f64, f64) {
281 let n = y.len();
282 let y_mean = y.iter().sum::<f64>() / n as f64;
283 let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
284 let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
285 let r_squared = if ss_tot > 0.0 {
286 1.0 - ss_res / ss_tot
287 } else {
288 0.0
289 };
290 let df_model = (p_total - 1) as f64;
291 let r_squared_adj = if n as f64 - df_model - 1.0 > 0.0 {
292 1.0 - (1.0 - r_squared) * (n as f64 - 1.0) / (n as f64 - df_model - 1.0)
293 } else {
294 r_squared
295 };
296 (r_squared, r_squared_adj)
297}
298
299fn ols_solve(x: &FdMatrix, y: &[f64]) -> Option<(Vec<f64>, Vec<f64>)> {
306 let (n, p) = x.shape();
307 if n < p || p == 0 {
308 return None;
309 }
310 let xtx = compute_xtx(x);
311 let xty = compute_xty(x, y);
312 let l = cholesky_factor(&xtx, p)?;
313 let b = cholesky_forward_back(&l, &xty, p);
314 let hat_diag = compute_hat_diagonal(x, &l);
315 Some((b, hat_diag))
316}
317
318pub fn fregre_lm(
337 data: &FdMatrix,
338 y: &[f64],
339 scalar_covariates: Option<&FdMatrix>,
340 ncomp: usize,
341) -> Option<FregreLmResult> {
342 let (n, m) = data.shape();
343 if n < 3 || m == 0 || y.len() != n {
344 return None;
345 }
346 if let Some(sc) = scalar_covariates {
347 if sc.nrows() != n {
348 return None;
349 }
350 }
351
352 let ncomp = if ncomp == 0 {
353 let cv = fregre_cv(data, y, scalar_covariates, 1, m.min(n - 1).min(20), 5)?;
354 cv.optimal_k
355 } else {
356 ncomp.min(n - 1).min(m)
357 };
358
359 let fpca = fdata_to_pc_1d(data, ncomp)?;
360 let design = build_design_matrix(&fpca.scores, ncomp, scalar_covariates, n);
361 let p_total = design.ncols();
362 let (coeffs, hat_diag) = ols_solve(&design, y)?;
363
364 let fitted_values = compute_fitted(&design, &coeffs);
365 let residuals: Vec<f64> = y
366 .iter()
367 .zip(&fitted_values)
368 .map(|(&yi, &yh)| yi - yh)
369 .collect();
370 let (r_squared, r_squared_adj) = compute_r_squared(y, &residuals, p_total);
371
372 let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
373 let df_resid = (n as f64 - p_total as f64).max(1.0);
374 let residual_se = (ss_res / df_resid).sqrt();
375 let sigma2 = ss_res / df_resid;
376
377 let xtx = compute_xtx(&design);
378 let l = cholesky_factor(&xtx, p_total).unwrap_or_else(|| vec![1.0; p_total * p_total]);
379 let std_errors = compute_ols_std_errors(&l, p_total, sigma2);
380
381 let gcv = hat_diag
382 .iter()
383 .zip(&residuals)
384 .map(|(&h, &r)| (r / (1.0 - h).max(1e-10)).powi(2))
385 .sum::<f64>()
386 / n as f64;
387
388 let beta_t = recover_beta_t(&coeffs[1..1 + ncomp], &fpca.rotation, m);
389 let gamma: Vec<f64> = coeffs[1 + ncomp..].to_vec();
390
391 Some(FregreLmResult {
392 intercept: coeffs[0],
393 beta_t,
394 gamma,
395 fitted_values,
396 residuals,
397 r_squared,
398 r_squared_adj,
399 std_errors,
400 ncomp,
401 fpca,
402 coefficients: coeffs,
403 residual_se,
404 gcv,
405 })
406}
407
408fn copy_rows(dst: &mut FdMatrix, src: &FdMatrix, src_rows: &[usize]) {
414 let ncols = src.ncols();
415 for (dst_i, &src_i) in src_rows.iter().enumerate() {
416 for j in 0..ncols {
417 dst[(dst_i, j)] = src[(src_i, j)];
418 }
419 }
420}
421
422fn cv_split_fold(
424 data: &FdMatrix,
425 y: &[f64],
426 scalar_covariates: Option<&FdMatrix>,
427 test_start: usize,
428 test_end: usize,
429) -> (
430 FdMatrix,
431 Vec<f64>,
432 FdMatrix,
433 Vec<f64>,
434 Option<FdMatrix>,
435 Option<FdMatrix>,
436) {
437 let n = data.nrows();
438 let ncols = data.ncols();
439
440 let train_idx: Vec<usize> = (0..test_start).chain(test_end..n).collect();
441 let test_idx: Vec<usize> = (test_start..test_end).collect();
442
443 let mut train_data = FdMatrix::zeros(train_idx.len(), ncols);
444 let mut test_data = FdMatrix::zeros(test_idx.len(), ncols);
445 copy_rows(&mut train_data, data, &train_idx);
446 copy_rows(&mut test_data, data, &test_idx);
447
448 let train_y: Vec<f64> = train_idx.iter().map(|&i| y[i]).collect();
449 let test_y: Vec<f64> = test_idx.iter().map(|&i| y[i]).collect();
450
451 let train_sc = scalar_covariates.map(|sc| {
452 let mut m = FdMatrix::zeros(train_idx.len(), sc.ncols());
453 copy_rows(&mut m, sc, &train_idx);
454 m
455 });
456 let test_sc = scalar_covariates.map(|sc| {
457 let mut m = FdMatrix::zeros(test_idx.len(), sc.ncols());
458 copy_rows(&mut m, sc, &test_idx);
459 m
460 });
461
462 (train_data, train_y, test_data, test_y, train_sc, test_sc)
463}
464
465fn cv_error_for_k(
467 data: &FdMatrix,
468 y: &[f64],
469 scalar_covariates: Option<&FdMatrix>,
470 k: usize,
471 n_folds: usize,
472) -> Option<f64> {
473 let n = data.nrows();
474 let fold_size = n / n_folds;
475 let mut total_error = 0.0;
476 let mut count = 0;
477
478 for fold in 0..n_folds {
479 let test_start = fold * fold_size;
480 let test_end = if fold == n_folds - 1 {
481 n
482 } else {
483 test_start + fold_size
484 };
485 let n_test = test_end - test_start;
486 if n - n_test < k + 2 {
487 continue;
488 }
489
490 let (train_data, train_y, test_data, test_y, train_sc, test_sc) =
491 cv_split_fold(data, y, scalar_covariates, test_start, test_end);
492
493 let fit = match fregre_lm(&train_data, &train_y, train_sc.as_ref(), k) {
494 Some(f) => f,
495 None => continue,
496 };
497
498 let predictions = predict_fregre_lm(&fit, &test_data, test_sc.as_ref());
499 let fold_mse: f64 = predictions
500 .iter()
501 .zip(&test_y)
502 .map(|(&yhat, &yi)| (yhat - yi).powi(2))
503 .sum::<f64>()
504 / n_test as f64;
505
506 total_error += fold_mse * n_test as f64;
507 count += n_test;
508 }
509
510 if count > 0 {
511 Some(total_error / count as f64)
512 } else {
513 None
514 }
515}
516
517pub fn fregre_cv(
527 data: &FdMatrix,
528 y: &[f64],
529 scalar_covariates: Option<&FdMatrix>,
530 k_min: usize,
531 k_max: usize,
532 n_folds: usize,
533) -> Option<FregreCvResult> {
534 let n = data.nrows();
535 if n < n_folds || k_min < 1 || k_min > k_max {
536 return None;
537 }
538
539 let k_max = k_max.min(n - 2).min(data.ncols());
540
541 let mut k_values = Vec::new();
542 let mut cv_errors = Vec::new();
543
544 for k in k_min..=k_max {
545 if let Some(err) = cv_error_for_k(data, y, scalar_covariates, k, n_folds) {
546 k_values.push(k);
547 cv_errors.push(err);
548 }
549 }
550
551 if k_values.is_empty() {
552 return None;
553 }
554
555 let (optimal_idx, &min_cv_error) = cv_errors
556 .iter()
557 .enumerate()
558 .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
559 .unwrap();
560
561 Some(FregreCvResult {
562 k_values: k_values.clone(),
563 cv_errors,
564 optimal_k: k_values[optimal_idx],
565 min_cv_error,
566 })
567}
568
569pub fn predict_fregre_lm(
576 fit: &FregreLmResult,
577 new_data: &FdMatrix,
578 new_scalar: Option<&FdMatrix>,
579) -> Vec<f64> {
580 let (n_new, m) = new_data.shape();
581 let ncomp = fit.ncomp;
582 let p_scalar = fit.gamma.len();
583
584 let mut predictions = vec![0.0; n_new];
585 for i in 0..n_new {
586 let mut yhat = fit.intercept;
587 for k in 0..ncomp {
588 let mut s = 0.0;
589 for j in 0..m {
590 s += (new_data[(i, j)] - fit.fpca.mean[j]) * fit.fpca.rotation[(j, k)];
591 }
592 yhat += fit.coefficients[1 + k] * s;
593 }
594 if let Some(sc) = new_scalar {
595 for j in 0..p_scalar {
596 yhat += fit.gamma[j] * sc[(i, j)];
597 }
598 }
599 predictions[i] = yhat;
600 }
601 predictions
602}
603
604fn gaussian_kernel(d: f64, h: f64) -> f64 {
610 (-d * d / (2.0 * h * h)).exp()
611}
612
613fn compute_pairwise_distances(data: &FdMatrix, argvals: &[f64]) -> Vec<f64> {
615 let n = data.nrows();
616 let weights = crate::helpers::simpsons_weights(argvals);
617 let mut dists = vec![0.0; n * n];
618 for i in 0..n {
619 for j in (i + 1)..n {
620 let d = crate::helpers::l2_distance(&data.row(i), &data.row(j), &weights);
621 dists[i * n + j] = d;
622 dists[j * n + i] = d;
623 }
624 }
625 dists
626}
627
628fn compute_scalar_distances(sc: &FdMatrix) -> Vec<f64> {
630 let n = sc.nrows();
631 let p = sc.ncols();
632 let mut dists = vec![0.0; n * n];
633 for i in 0..n {
634 for j in (i + 1)..n {
635 let mut d2 = 0.0;
636 for k in 0..p {
637 let diff = sc[(i, k)] - sc[(j, k)];
638 d2 += diff * diff;
639 }
640 let d = d2.sqrt();
641 dists[i * n + j] = d;
642 dists[j * n + i] = d;
643 }
644 }
645 dists
646}
647
648fn nw_loo_predict(
650 i: usize,
651 n: usize,
652 y: &[f64],
653 func_dists: &[f64],
654 scalar_dists: &[f64],
655 h_func: f64,
656 h_scalar: f64,
657 has_scalar: bool,
658) -> f64 {
659 let mut num = 0.0;
660 let mut den = 0.0;
661 for j in 0..n {
662 if i == j {
663 continue;
664 }
665 let kf = gaussian_kernel(func_dists[i * n + j], h_func);
666 let ks = if has_scalar {
667 gaussian_kernel(scalar_dists[i * n + j], h_scalar)
668 } else {
669 1.0
670 };
671 let w = kf * ks;
672 num += w * y[j];
673 den += w;
674 }
675 if den > 1e-15 {
676 num / den
677 } else {
678 y[i]
679 }
680}
681
682fn loo_cv_error(dists: &[f64], y: &[f64], n: usize, h: f64) -> f64 {
684 (0..n)
685 .map(|i| {
686 let mut num = 0.0;
687 let mut den = 0.0;
688 for j in 0..n {
689 if i == j {
690 continue;
691 }
692 let w = gaussian_kernel(dists[i * n + j], h);
693 num += w * y[j];
694 den += w;
695 }
696 let yhat = if den > 1e-15 { num / den } else { y[i] };
697 (y[i] - yhat).powi(2)
698 })
699 .sum::<f64>()
700 / n as f64
701}
702
703fn select_bandwidth_loo(dists: &[f64], y: &[f64], n: usize, _other_dists: Option<&[f64]>) -> f64 {
705 let mut nonzero_dists: Vec<f64> = (0..n)
706 .flat_map(|i| ((i + 1)..n).map(move |j| dists[i * n + j]))
707 .filter(|&d| d > 0.0)
708 .collect();
709 nonzero_dists.sort_by(|a, b| a.partial_cmp(b).unwrap());
710
711 if nonzero_dists.is_empty() {
712 return 1.0;
713 }
714
715 let n_cand = 20;
716 let mut best_h = nonzero_dists[nonzero_dists.len() / 2];
717 let mut best_cv = f64::INFINITY;
718
719 for qi in 1..=n_cand {
720 let q = qi as f64 / (n_cand + 1) as f64;
721 let idx = ((nonzero_dists.len() as f64 * q) as usize).min(nonzero_dists.len() - 1);
722 let h = nonzero_dists[idx].max(1e-10);
723 let cv = loo_cv_error(dists, y, n, h);
724 if cv < best_cv {
725 best_cv = cv;
726 best_h = h;
727 }
728 }
729 best_h
730}
731
732pub fn fregre_np_mixed(
749 data: &FdMatrix,
750 y: &[f64],
751 argvals: &[f64],
752 scalar_covariates: Option<&FdMatrix>,
753 h_func: f64,
754 h_scalar: f64,
755) -> Option<FregreNpResult> {
756 let n = data.nrows();
757 if n < 3 || data.ncols() == 0 || y.len() != n || argvals.len() != data.ncols() {
758 return None;
759 }
760
761 let func_dists = compute_pairwise_distances(data, argvals);
762 let has_scalar = scalar_covariates.is_some();
763 let scalar_dists = scalar_covariates
764 .map(compute_scalar_distances)
765 .unwrap_or_default();
766
767 let h_func = if h_func <= 0.0 {
768 select_bandwidth_loo(&func_dists, y, n, None)
769 } else {
770 h_func
771 };
772
773 let h_scalar = if has_scalar && h_scalar <= 0.0 {
774 select_bandwidth_loo(&scalar_dists, y, n, Some(&func_dists))
775 } else {
776 h_scalar
777 };
778
779 let mut fitted_values = vec![0.0; n];
780 let mut cv_error = 0.0;
781 for i in 0..n {
782 fitted_values[i] = nw_loo_predict(
783 i,
784 n,
785 y,
786 &func_dists,
787 &scalar_dists,
788 h_func,
789 h_scalar,
790 has_scalar,
791 );
792 cv_error += (y[i] - fitted_values[i]).powi(2);
793 }
794 cv_error /= n as f64;
795
796 let residuals: Vec<f64> = y
797 .iter()
798 .zip(&fitted_values)
799 .map(|(&yi, &yh)| yi - yh)
800 .collect();
801 let (r_squared, _) = compute_r_squared(y, &residuals, 1);
802
803 Some(FregreNpResult {
804 fitted_values,
805 residuals,
806 r_squared,
807 h_func,
808 h_scalar,
809 cv_error,
810 })
811}
812
813pub fn predict_fregre_np(
815 train_data: &FdMatrix,
816 y: &[f64],
817 train_scalar: Option<&FdMatrix>,
818 new_data: &FdMatrix,
819 new_scalar: Option<&FdMatrix>,
820 argvals: &[f64],
821 h_func: f64,
822 h_scalar: f64,
823) -> Vec<f64> {
824 let n_train = train_data.nrows();
825 let n_new = new_data.nrows();
826 let weights = crate::helpers::simpsons_weights(argvals);
827
828 (0..n_new)
829 .map(|i| {
830 let new_row = new_data.row(i);
831 let mut num = 0.0;
832 let mut den = 0.0;
833 for j in 0..n_train {
834 let d_func = crate::helpers::l2_distance(&new_row, &train_data.row(j), &weights);
835 let kf = gaussian_kernel(d_func, h_func);
836 let ks = match (new_scalar, train_scalar) {
837 (Some(ns), Some(ts)) => {
838 let d2: f64 = (0..ns.ncols())
839 .map(|k| (ns[(i, k)] - ts[(j, k)]).powi(2))
840 .sum();
841 gaussian_kernel(d2.sqrt(), h_scalar)
842 }
843 _ => 1.0,
844 };
845 let w = kf * ks;
846 num += w * y[j];
847 den += w;
848 }
849 if den > 1e-15 {
850 num / den
851 } else {
852 0.0
853 }
854 })
855 .collect()
856}
857
858fn irls_step(design: &FdMatrix, y: &[f64], beta: &[f64]) -> Option<Vec<f64>> {
865 let (n, p) = design.shape();
866
867 let eta: Vec<f64> = (0..n)
869 .map(|i| (0..p).map(|j| design[(i, j)] * beta[j]).sum())
870 .collect();
871 let mu: Vec<f64> = eta.iter().map(|&e| sigmoid(e)).collect();
872 let w: Vec<f64> = mu.iter().map(|&p| (p * (1.0 - p)).max(1e-10)).collect();
873 let z_work: Vec<f64> = (0..n).map(|i| eta[i] + (y[i] - mu[i]) / w[i]).collect();
874
875 let mut xtwx = vec![0.0; p * p];
877 for k in 0..p {
878 for j in k..p {
879 let mut s = 0.0;
880 for i in 0..n {
881 s += design[(i, k)] * w[i] * design[(i, j)];
882 }
883 xtwx[k * p + j] = s;
884 xtwx[j * p + k] = s;
885 }
886 }
887
888 let mut xtwz = vec![0.0; p];
889 for k in 0..p {
890 let mut s = 0.0;
891 for i in 0..n {
892 s += design[(i, k)] * w[i] * z_work[i];
893 }
894 xtwz[k] = s;
895 }
896
897 cholesky_solve(&xtwx, &xtwz, p)
898}
899
900fn logistic_log_likelihood(probabilities: &[f64], y: &[f64]) -> f64 {
902 probabilities
903 .iter()
904 .zip(y)
905 .map(|(&p, &yi)| {
906 let p = p.clamp(1e-15, 1.0 - 1e-15);
907 yi * p.ln() + (1.0 - yi) * (1.0 - p).ln()
908 })
909 .sum()
910}
911
912fn sigmoid(x: f64) -> f64 {
914 if x >= 0.0 {
915 1.0 / (1.0 + (-x).exp())
916 } else {
917 let ex = x.exp();
918 ex / (1.0 + ex)
919 }
920}
921
922fn irls_loop(design: &FdMatrix, y: &[f64], max_iter: usize, tol: f64) -> (Vec<f64>, usize) {
924 let p_total = design.ncols();
925 let mut beta = vec![0.0; p_total];
926 let mut iterations = 0;
927 for iter in 0..max_iter {
928 iterations = iter + 1;
929 let beta_new = match irls_step(design, y, &beta) {
930 Some(b) => b,
931 None => break,
932 };
933 let change: f64 = beta_new
934 .iter()
935 .zip(&beta)
936 .map(|(a, b)| (a - b).abs())
937 .fold(0.0_f64, f64::max);
938 beta = beta_new;
939 if change < tol {
940 break;
941 }
942 }
943 (beta, iterations)
944}
945
946fn build_logistic_result(
948 design: &FdMatrix,
949 beta: Vec<f64>,
950 y: &[f64],
951 fpca: FpcaResult,
952 ncomp: usize,
953 m: usize,
954 iterations: usize,
955) -> FunctionalLogisticResult {
956 let n = y.len();
957 let eta = compute_fitted(design, &beta);
958 let probabilities: Vec<f64> = eta.iter().map(|&e| sigmoid(e)).collect();
959 let predicted_classes: Vec<u8> = probabilities
960 .iter()
961 .map(|&p| if p >= 0.5 { 1 } else { 0 })
962 .collect();
963 let correct: usize = predicted_classes
964 .iter()
965 .zip(y)
966 .filter(|(&pred, &actual)| pred as f64 == actual)
967 .count();
968 let beta_t = recover_beta_t(&beta[1..1 + ncomp], &fpca.rotation, m);
969 let gamma: Vec<f64> = beta[1 + ncomp..].to_vec();
970
971 FunctionalLogisticResult {
972 intercept: beta[0],
973 beta_t,
974 gamma,
975 accuracy: correct as f64 / n as f64,
976 log_likelihood: logistic_log_likelihood(&probabilities, y),
977 probabilities,
978 predicted_classes,
979 ncomp,
980 coefficients: beta,
981 iterations,
982 fpca,
983 }
984}
985
986pub fn functional_logistic(
999 data: &FdMatrix,
1000 y: &[f64],
1001 scalar_covariates: Option<&FdMatrix>,
1002 ncomp: usize,
1003 max_iter: usize,
1004 tol: f64,
1005) -> Option<FunctionalLogisticResult> {
1006 let (n, m) = data.shape();
1007 if n < 3 || m == 0 || y.len() != n {
1008 return None;
1009 }
1010 if y.iter().any(|&yi| yi != 0.0 && yi != 1.0) {
1011 return None;
1012 }
1013
1014 let ncomp = ncomp.min(n - 1).min(m);
1015 let fpca = fdata_to_pc_1d(data, ncomp)?;
1016 let design = build_design_matrix(&fpca.scores, ncomp, scalar_covariates, n);
1017
1018 let max_iter = if max_iter == 0 { 25 } else { max_iter };
1019 let tol = if tol <= 0.0 { 1e-6 } else { tol };
1020
1021 let (beta, iterations) = irls_loop(&design, y, max_iter, tol);
1022 Some(build_logistic_result(
1023 &design, beta, y, fpca, ncomp, m, iterations,
1024 ))
1025}
1026
1027#[cfg(test)]
1032mod tests {
1033 use super::*;
1034 use std::f64::consts::PI;
1035
1036 fn generate_test_data(n: usize, m: usize, seed: u64) -> (FdMatrix, Vec<f64>, Vec<f64>) {
1037 let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
1038 let mut data = FdMatrix::zeros(n, m);
1039 let mut y = vec![0.0; n];
1040
1041 for i in 0..n {
1042 let phase =
1043 (seed.wrapping_mul(17).wrapping_add(i as u64 * 31) % 1000) as f64 / 1000.0 * PI;
1044 let amplitude =
1045 ((seed.wrapping_mul(13).wrapping_add(i as u64 * 7) % 100) as f64 / 100.0) - 0.5;
1046 for j in 0..m {
1047 data[(i, j)] =
1048 (2.0 * PI * t[j] + phase).sin() + amplitude * (4.0 * PI * t[j]).cos();
1049 }
1050 y[i] = 2.0 * phase + 3.0 * amplitude + 0.05 * (seed.wrapping_add(i as u64) % 10) as f64;
1051 }
1052 (data, y, t)
1053 }
1054
1055 #[test]
1058 fn test_fregre_lm_basic() {
1059 let (data, y, _t) = generate_test_data(30, 50, 42);
1060 let result = fregre_lm(&data, &y, None, 3);
1061 assert!(result.is_some());
1062 let fit = result.unwrap();
1063 assert_eq!(fit.fitted_values.len(), 30);
1064 assert_eq!(fit.residuals.len(), 30);
1065 assert_eq!(fit.beta_t.len(), 50);
1066 assert_eq!(fit.ncomp, 3);
1067 assert!(fit.r_squared >= 0.0 && fit.r_squared <= 1.0 + 1e-10);
1068 }
1069
1070 #[test]
1071 fn test_fregre_lm_with_scalar_covariates() {
1072 let (data, y, _t) = generate_test_data(30, 50, 42);
1073 let mut sc = FdMatrix::zeros(30, 2);
1074 for i in 0..30 {
1075 sc[(i, 0)] = i as f64 / 30.0;
1076 sc[(i, 1)] = (i as f64 * 0.7).sin();
1077 }
1078 let result = fregre_lm(&data, &y, Some(&sc), 3);
1079 assert!(result.is_some());
1080 let fit = result.unwrap();
1081 assert_eq!(fit.gamma.len(), 2);
1082 }
1083
1084 #[test]
1085 fn test_fregre_lm_residuals_sum_near_zero() {
1086 let (data, y, _t) = generate_test_data(30, 50, 42);
1087 let fit = fregre_lm(&data, &y, None, 3).unwrap();
1088 let resid_sum: f64 = fit.residuals.iter().sum::<f64>();
1089 assert!(
1090 resid_sum.abs() < 1e-8,
1091 "Residuals should sum to ~0 with intercept, got {}",
1092 resid_sum
1093 );
1094 }
1095
1096 #[test]
1097 fn test_fregre_lm_fitted_plus_residuals_equals_y() {
1098 let (data, y, _t) = generate_test_data(30, 50, 42);
1099 let fit = fregre_lm(&data, &y, None, 3).unwrap();
1100 for i in 0..30 {
1101 let reconstructed = fit.fitted_values[i] + fit.residuals[i];
1102 assert!(
1103 (reconstructed - y[i]).abs() < 1e-10,
1104 "ŷ + r should equal y at index {}",
1105 i
1106 );
1107 }
1108 }
1109
1110 #[test]
1111 fn test_fregre_lm_more_components_higher_r2() {
1112 let (data, y, _t) = generate_test_data(50, 50, 42);
1113 let fit1 = fregre_lm(&data, &y, None, 1).unwrap();
1114 let fit3 = fregre_lm(&data, &y, None, 3).unwrap();
1115 assert!(
1116 fit3.r_squared >= fit1.r_squared - 1e-10,
1117 "More components should give >= R²: {} vs {}",
1118 fit3.r_squared,
1119 fit1.r_squared
1120 );
1121 }
1122
1123 #[test]
1124 fn test_fregre_lm_invalid_input() {
1125 let data = FdMatrix::zeros(2, 50);
1126 let y = vec![1.0, 2.0];
1127 assert!(fregre_lm(&data, &y, None, 1).is_none());
1128
1129 let data = FdMatrix::zeros(10, 50);
1130 let y = vec![1.0; 5];
1131 assert!(fregre_lm(&data, &y, None, 2).is_none());
1132 }
1133
1134 #[test]
1135 fn test_fregre_lm_std_errors_positive() {
1136 let (data, y, _t) = generate_test_data(30, 50, 42);
1137 let fit = fregre_lm(&data, &y, None, 3).unwrap();
1138 for (i, &se) in fit.std_errors.iter().enumerate() {
1139 assert!(
1140 se > 0.0 && se.is_finite(),
1141 "Std error {} should be positive finite, got {}",
1142 i,
1143 se
1144 );
1145 }
1146 }
1147
1148 #[test]
1151 fn test_predict_fregre_lm_on_training_data() {
1152 let (data, y, _t) = generate_test_data(30, 50, 42);
1153 let fit = fregre_lm(&data, &y, None, 3).unwrap();
1154 let preds = predict_fregre_lm(&fit, &data, None);
1155 for i in 0..30 {
1156 assert!(
1157 (preds[i] - fit.fitted_values[i]).abs() < 1e-6,
1158 "Prediction on training data should match fitted values"
1159 );
1160 }
1161 }
1162
1163 #[test]
1166 fn test_fregre_cv_returns_result() {
1167 let (data, y, _t) = generate_test_data(30, 50, 42);
1168 let result = fregre_cv(&data, &y, None, 1, 8, 5);
1169 assert!(result.is_some());
1170 let cv = result.unwrap();
1171 assert!(cv.optimal_k >= 1 && cv.optimal_k <= 8);
1172 assert!(cv.min_cv_error >= 0.0);
1173 }
1174
1175 #[test]
1176 fn test_fregre_cv_with_scalar_covariates() {
1177 let (data, y, _t) = generate_test_data(30, 50, 42);
1178 let mut sc = FdMatrix::zeros(30, 1);
1179 for i in 0..30 {
1180 sc[(i, 0)] = i as f64;
1181 }
1182 let result = fregre_cv(&data, &y, Some(&sc), 1, 5, 3);
1183 assert!(result.is_some());
1184 }
1185
1186 #[test]
1189 fn test_fregre_np_mixed_basic() {
1190 let (data, y, t) = generate_test_data(30, 50, 42);
1191 let result = fregre_np_mixed(&data, &y, &t, None, 0.0, 0.0);
1192 assert!(result.is_some());
1193 let fit = result.unwrap();
1194 assert_eq!(fit.fitted_values.len(), 30);
1195 assert!(fit.h_func > 0.0);
1196 assert!(fit.cv_error >= 0.0);
1197 }
1198
1199 #[test]
1200 fn test_fregre_np_mixed_with_scalars() {
1201 let (data, y, t) = generate_test_data(30, 50, 42);
1202 let mut sc = FdMatrix::zeros(30, 1);
1203 for i in 0..30 {
1204 sc[(i, 0)] = i as f64 / 30.0;
1205 }
1206 let result = fregre_np_mixed(&data, &y, &t, Some(&sc), 0.0, 0.0);
1207 assert!(result.is_some());
1208 let fit = result.unwrap();
1209 assert!(fit.h_scalar > 0.0);
1210 }
1211
1212 #[test]
1213 fn test_fregre_np_mixed_manual_bandwidth() {
1214 let (data, y, t) = generate_test_data(30, 50, 42);
1215 let result = fregre_np_mixed(&data, &y, &t, None, 0.5, 0.0);
1216 assert!(result.is_some());
1217 let fit = result.unwrap();
1218 assert!(
1219 (fit.h_func - 0.5).abs() < 1e-10,
1220 "Should use provided bandwidth"
1221 );
1222 }
1223
1224 #[test]
1227 fn test_functional_logistic_basic() {
1228 let (data, y_cont, _t) = generate_test_data(30, 50, 42);
1229 let y_median = {
1230 let mut sorted = y_cont.clone();
1231 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
1232 sorted[sorted.len() / 2]
1233 };
1234 let y_bin: Vec<f64> = y_cont
1235 .iter()
1236 .map(|&v| if v >= y_median { 1.0 } else { 0.0 })
1237 .collect();
1238
1239 let result = functional_logistic(&data, &y_bin, None, 3, 25, 1e-6);
1240 assert!(result.is_some());
1241 let fit = result.unwrap();
1242 assert_eq!(fit.probabilities.len(), 30);
1243 assert_eq!(fit.predicted_classes.len(), 30);
1244 assert!(fit.accuracy >= 0.0 && fit.accuracy <= 1.0);
1245 for &p in &fit.probabilities {
1246 assert!((0.0..=1.0).contains(&p), "Probability out of range: {}", p);
1247 }
1248 }
1249
1250 #[test]
1251 fn test_functional_logistic_with_scalar_covariates() {
1252 let (data, y_cont, _t) = generate_test_data(30, 50, 42);
1253 let y_median = {
1254 let mut sorted = y_cont.clone();
1255 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
1256 sorted[sorted.len() / 2]
1257 };
1258 let y_bin: Vec<f64> = y_cont
1259 .iter()
1260 .map(|&v| if v >= y_median { 1.0 } else { 0.0 })
1261 .collect();
1262
1263 let mut sc = FdMatrix::zeros(30, 1);
1264 for i in 0..30 {
1265 sc[(i, 0)] = i as f64 / 30.0;
1266 }
1267 let result = functional_logistic(&data, &y_bin, Some(&sc), 3, 25, 1e-6);
1268 assert!(result.is_some());
1269 let fit = result.unwrap();
1270 assert_eq!(fit.gamma.len(), 1);
1271 }
1272
1273 #[test]
1274 fn test_functional_logistic_invalid_response() {
1275 let (data, _, _) = generate_test_data(30, 50, 42);
1276 let y: Vec<f64> = (0..30).map(|i| i as f64).collect();
1277 assert!(functional_logistic(&data, &y, None, 3, 25, 1e-6).is_none());
1278 }
1279
1280 #[test]
1281 fn test_functional_logistic_convergence() {
1282 let (data, y_cont, _t) = generate_test_data(40, 50, 42);
1283 let y_median = {
1284 let mut sorted = y_cont.clone();
1285 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
1286 sorted[sorted.len() / 2]
1287 };
1288 let y_bin: Vec<f64> = y_cont
1289 .iter()
1290 .map(|&v| if v >= y_median { 1.0 } else { 0.0 })
1291 .collect();
1292
1293 let fit = functional_logistic(&data, &y_bin, None, 3, 100, 1e-8).unwrap();
1294 assert!(fit.iterations <= 100, "Should converge within max_iter");
1295 }
1296
1297 #[test]
1300 fn test_fregre_lm_single_component() {
1301 let (data, y, _t) = generate_test_data(20, 50, 42);
1302 let result = fregre_lm(&data, &y, None, 1);
1303 assert!(result.is_some());
1304 let fit = result.unwrap();
1305 assert_eq!(fit.ncomp, 1);
1306 }
1307
1308 #[test]
1309 fn test_fregre_lm_auto_k_selection() {
1310 let (data, y, _t) = generate_test_data(30, 50, 42);
1311 let result = fregre_lm(&data, &y, None, 0);
1312 assert!(result.is_some());
1313 let fit = result.unwrap();
1314 assert!(fit.ncomp >= 1);
1315 }
1316
1317 #[test]
1318 fn test_predict_fregre_np_basic() {
1319 let (data, y, t) = generate_test_data(30, 50, 42);
1320 let preds = predict_fregre_np(&data, &y, None, &data, None, &t, 0.5, 0.5);
1321 assert_eq!(preds.len(), 30);
1322 for &p in &preds {
1323 assert!(p.is_finite(), "Prediction should be finite");
1324 }
1325 }
1326
1327 #[test]
1328 fn test_sigmoid_properties() {
1329 assert!((sigmoid(0.0) - 0.5).abs() < 1e-10);
1330 assert!(sigmoid(10.0) > 0.999);
1331 assert!(sigmoid(-10.0) < 0.001);
1332 assert!((sigmoid(3.0) + sigmoid(-3.0) - 1.0).abs() < 1e-10);
1333 }
1334}