1use crate::error::FdarError;
6use crate::matrix::FdMatrix;
7#[cfg(feature = "linalg")]
8use anofox_regression::solvers::RidgeRegressor;
9#[cfg(feature = "linalg")]
10use anofox_regression::{FittedRegressor, Regressor};
11use nalgebra::SVD;
12
13#[derive(Debug, Clone, PartialEq)]
15pub struct FpcaResult {
16 pub singular_values: Vec<f64>,
18 pub rotation: FdMatrix,
20 pub scores: FdMatrix,
22 pub mean: Vec<f64>,
24 pub centered: FdMatrix,
26}
27
28fn center_columns(data: &FdMatrix) -> (FdMatrix, Vec<f64>) {
30 let (n, m) = data.shape();
31 let mut centered = FdMatrix::zeros(n, m);
32 let mut means = vec![0.0; m];
33 for j in 0..m {
34 let col = data.column(j);
35 let mean = col.iter().sum::<f64>() / n as f64;
36 means[j] = mean;
37 let out_col = centered.column_mut(j);
38 for i in 0..n {
39 out_col[i] = col[i] - mean;
40 }
41 }
42 (centered, means)
43}
44
45fn extract_pc_components(
47 svd: &SVD<f64, nalgebra::Dyn, nalgebra::Dyn>,
48 n: usize,
49 m: usize,
50 ncomp: usize,
51) -> Option<(Vec<f64>, FdMatrix, FdMatrix)> {
52 let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).copied().collect();
53
54 let v_t = svd.v_t.as_ref()?;
55 let mut rotation = FdMatrix::zeros(m, ncomp);
56 for k in 0..ncomp {
57 for j in 0..m {
58 rotation[(j, k)] = v_t[(k, j)];
59 }
60 }
61
62 let u = svd.u.as_ref()?;
63 let mut scores = FdMatrix::zeros(n, ncomp);
64 for k in 0..ncomp {
65 let sv_k = singular_values[k];
66 for i in 0..n {
67 scores[(i, k)] = u[(i, k)] * sv_k;
68 }
69 }
70
71 Some((singular_values, rotation, scores))
72}
73
74#[must_use = "expensive computation whose result should not be discarded"]
87pub fn fdata_to_pc_1d(data: &FdMatrix, ncomp: usize) -> Result<FpcaResult, FdarError> {
88 let (n, m) = data.shape();
89 if n == 0 {
90 return Err(FdarError::InvalidDimension {
91 parameter: "data",
92 expected: "n > 0 rows".to_string(),
93 actual: format!("n = {n}"),
94 });
95 }
96 if m == 0 {
97 return Err(FdarError::InvalidDimension {
98 parameter: "data",
99 expected: "m > 0 columns".to_string(),
100 actual: format!("m = {m}"),
101 });
102 }
103 if ncomp < 1 {
104 return Err(FdarError::InvalidParameter {
105 parameter: "ncomp",
106 message: format!("ncomp must be >= 1, got {ncomp}"),
107 });
108 }
109
110 let ncomp = ncomp.min(n).min(m);
111 let (centered, means) = center_columns(data);
112 let svd = SVD::new(centered.to_dmatrix(), true, true);
113 let (singular_values, rotation, scores) =
114 extract_pc_components(&svd, n, m, ncomp).ok_or_else(|| FdarError::ComputationFailed {
115 operation: "SVD",
116 detail: "failed to extract U or V_t from SVD decomposition".to_string(),
117 })?;
118
119 Ok(FpcaResult {
120 singular_values,
121 rotation,
122 scores,
123 mean: means,
124 centered,
125 })
126}
127
128#[derive(Debug, Clone, PartialEq)]
130pub struct PlsResult {
131 pub weights: FdMatrix,
133 pub scores: FdMatrix,
135 pub loadings: FdMatrix,
137}
138
139fn pls_compute_weights(x_cen: &FdMatrix, y_cen: &[f64]) -> Vec<f64> {
141 let (n, m) = x_cen.shape();
142 let mut w: Vec<f64> = (0..m)
143 .map(|j| {
144 let mut sum = 0.0;
145 for i in 0..n {
146 sum += x_cen[(i, j)] * y_cen[i];
147 }
148 sum
149 })
150 .collect();
151
152 let w_norm: f64 = w.iter().map(|&wi| wi * wi).sum::<f64>().sqrt();
153 if w_norm > 1e-10 {
154 for wi in &mut w {
155 *wi /= w_norm;
156 }
157 }
158 w
159}
160
161fn pls_compute_scores(x_cen: &FdMatrix, w: &[f64]) -> Vec<f64> {
163 let (n, m) = x_cen.shape();
164 (0..n)
165 .map(|i| {
166 let mut sum = 0.0;
167 for j in 0..m {
168 sum += x_cen[(i, j)] * w[j];
169 }
170 sum
171 })
172 .collect()
173}
174
175fn pls_compute_loadings(x_cen: &FdMatrix, t: &[f64], t_norm_sq: f64) -> Vec<f64> {
177 let (n, m) = x_cen.shape();
178 (0..m)
179 .map(|j| {
180 let mut sum = 0.0;
181 for i in 0..n {
182 sum += x_cen[(i, j)] * t[i];
183 }
184 sum / t_norm_sq.max(1e-10)
185 })
186 .collect()
187}
188
189fn pls_deflate_x(x_cen: &mut FdMatrix, t: &[f64], p: &[f64]) {
191 let (n, m) = x_cen.shape();
192 for j in 0..m {
193 for i in 0..n {
194 x_cen[(i, j)] -= t[i] * p[j];
195 }
196 }
197}
198
199fn pls_nipals_step(
201 k: usize,
202 x_cen: &mut FdMatrix,
203 y_cen: &mut [f64],
204 weights: &mut FdMatrix,
205 scores: &mut FdMatrix,
206 loadings: &mut FdMatrix,
207) {
208 let n = x_cen.nrows();
209 let m = x_cen.ncols();
210
211 let w = pls_compute_weights(x_cen, y_cen);
212 let t = pls_compute_scores(x_cen, &w);
213 let t_norm_sq: f64 = t.iter().map(|&ti| ti * ti).sum();
214 let p = pls_compute_loadings(x_cen, &t, t_norm_sq);
215
216 for j in 0..m {
217 weights[(j, k)] = w[j];
218 loadings[(j, k)] = p[j];
219 }
220 for i in 0..n {
221 scores[(i, k)] = t[i];
222 }
223
224 pls_deflate_x(x_cen, &t, &p);
225 let t_y: f64 = t.iter().zip(y_cen.iter()).map(|(&ti, &yi)| ti * yi).sum();
226 let q = t_y / t_norm_sq.max(1e-10);
227 for i in 0..n {
228 y_cen[i] -= t[i] * q;
229 }
230}
231
232#[must_use = "expensive computation whose result should not be discarded"]
245pub fn fdata_to_pls_1d(data: &FdMatrix, y: &[f64], ncomp: usize) -> Result<PlsResult, FdarError> {
246 let (n, m) = data.shape();
247 if n == 0 {
248 return Err(FdarError::InvalidDimension {
249 parameter: "data",
250 expected: "n > 0 rows".to_string(),
251 actual: format!("n = {n}"),
252 });
253 }
254 if m == 0 {
255 return Err(FdarError::InvalidDimension {
256 parameter: "data",
257 expected: "m > 0 columns".to_string(),
258 actual: format!("m = {m}"),
259 });
260 }
261 if y.len() != n {
262 return Err(FdarError::InvalidDimension {
263 parameter: "y",
264 expected: format!("length {n}"),
265 actual: format!("length {}", y.len()),
266 });
267 }
268 if ncomp < 1 {
269 return Err(FdarError::InvalidParameter {
270 parameter: "ncomp",
271 message: format!("ncomp must be >= 1, got {ncomp}"),
272 });
273 }
274
275 let ncomp = ncomp.min(n).min(m);
276
277 let x_means: Vec<f64> = (0..m)
279 .map(|j| {
280 let col = data.column(j);
281 let sum: f64 = col.iter().sum();
282 sum / n as f64
283 })
284 .collect();
285
286 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
287
288 let mut x_cen = FdMatrix::zeros(n, m);
289 for j in 0..m {
290 for i in 0..n {
291 x_cen[(i, j)] = data[(i, j)] - x_means[j];
292 }
293 }
294
295 let mut y_cen: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
296
297 let mut weights = FdMatrix::zeros(m, ncomp);
298 let mut scores = FdMatrix::zeros(n, ncomp);
299 let mut loadings = FdMatrix::zeros(m, ncomp);
300
301 for k in 0..ncomp {
303 pls_nipals_step(
304 k,
305 &mut x_cen,
306 &mut y_cen,
307 &mut weights,
308 &mut scores,
309 &mut loadings,
310 );
311 }
312
313 Ok(PlsResult {
314 weights,
315 scores,
316 loadings,
317 })
318}
319
320#[derive(Debug, Clone)]
322#[cfg(feature = "linalg")]
323pub struct RidgeResult {
324 pub coefficients: Vec<f64>,
326 pub intercept: f64,
328 pub fitted_values: Vec<f64>,
330 pub residuals: Vec<f64>,
332 pub r_squared: f64,
334 pub lambda: f64,
336 pub error: Option<String>,
338}
339
340#[cfg(feature = "linalg")]
348#[must_use = "expensive computation whose result should not be discarded"]
349pub fn ridge_regression_fit(
350 x: &FdMatrix,
351 y: &[f64],
352 lambda: f64,
353 with_intercept: bool,
354) -> RidgeResult {
355 let (n, m) = x.shape();
356 if n == 0 || m == 0 || y.len() != n {
357 return RidgeResult {
358 coefficients: Vec::new(),
359 intercept: 0.0,
360 fitted_values: Vec::new(),
361 residuals: Vec::new(),
362 r_squared: 0.0,
363 lambda,
364 error: Some("Invalid input dimensions".to_string()),
365 };
366 }
367
368 let x_faer = faer::Mat::from_fn(n, m, |i, j| x[(i, j)]);
370 let y_faer = faer::Col::from_fn(n, |i| y[i]);
371
372 let regressor = RidgeRegressor::builder()
374 .with_intercept(with_intercept)
375 .lambda(lambda)
376 .build();
377
378 let fitted = match regressor.fit(&x_faer, &y_faer) {
379 Ok(f) => f,
380 Err(e) => {
381 return RidgeResult {
382 coefficients: Vec::new(),
383 intercept: 0.0,
384 fitted_values: Vec::new(),
385 residuals: Vec::new(),
386 r_squared: 0.0,
387 lambda,
388 error: Some(format!("Fit failed: {e:?}")),
389 }
390 }
391 };
392
393 let coefs = fitted.coefficients();
395 let coefficients: Vec<f64> = (0..coefs.nrows()).map(|i| coefs[i]).collect();
396
397 let intercept = fitted.intercept().unwrap_or(0.0);
399
400 let mut fitted_values = vec![0.0; n];
402 for i in 0..n {
403 let mut pred = intercept;
404 for j in 0..m {
405 pred += x[(i, j)] * coefficients[j];
406 }
407 fitted_values[i] = pred;
408 }
409
410 let residuals: Vec<f64> = y
412 .iter()
413 .zip(fitted_values.iter())
414 .map(|(&yi, &yhat)| yi - yhat)
415 .collect();
416
417 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
419 let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
420 let ss_res: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
421 let r_squared = if ss_tot > 0.0 {
422 1.0 - ss_res / ss_tot
423 } else {
424 0.0
425 };
426
427 RidgeResult {
428 coefficients,
429 intercept,
430 fitted_values,
431 residuals,
432 r_squared,
433 lambda,
434 error: None,
435 }
436}
437
438#[cfg(test)]
439mod tests {
440 use super::*;
441 use std::f64::consts::PI;
442
443 fn generate_test_fdata(n: usize, m: usize) -> (FdMatrix, Vec<f64>) {
445 let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
446
447 let mut data = FdMatrix::zeros(n, m);
449 for i in 0..n {
450 let phase = (i as f64 / n as f64) * PI;
451 for j in 0..m {
452 data[(i, j)] = (2.0 * PI * t[j] + phase).sin();
453 }
454 }
455
456 (data, t)
457 }
458
459 #[test]
462 fn test_fdata_to_pc_1d_basic() {
463 let n = 20;
464 let m = 50;
465 let ncomp = 3;
466 let (data, _) = generate_test_fdata(n, m);
467
468 let result = fdata_to_pc_1d(&data, ncomp);
469 assert!(result.is_ok());
470
471 let fpca = result.unwrap();
472 assert_eq!(fpca.singular_values.len(), ncomp);
473 assert_eq!(fpca.rotation.shape(), (m, ncomp));
474 assert_eq!(fpca.scores.shape(), (n, ncomp));
475 assert_eq!(fpca.mean.len(), m);
476 assert_eq!(fpca.centered.shape(), (n, m));
477 }
478
479 #[test]
480 fn test_fdata_to_pc_1d_singular_values_decreasing() {
481 let n = 20;
482 let m = 50;
483 let ncomp = 5;
484 let (data, _) = generate_test_fdata(n, m);
485
486 let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
487
488 for i in 1..fpca.singular_values.len() {
490 assert!(
491 fpca.singular_values[i] <= fpca.singular_values[i - 1] + 1e-10,
492 "Singular values should be decreasing"
493 );
494 }
495 }
496
497 #[test]
498 fn test_fdata_to_pc_1d_centered_has_zero_mean() {
499 let n = 20;
500 let m = 50;
501 let (data, _) = generate_test_fdata(n, m);
502
503 let fpca = fdata_to_pc_1d(&data, 3).unwrap();
504
505 for j in 0..m {
507 let col_mean: f64 = (0..n).map(|i| fpca.centered[(i, j)]).sum::<f64>() / n as f64;
508 assert!(
509 col_mean.abs() < 1e-10,
510 "Centered data should have zero column mean"
511 );
512 }
513 }
514
515 #[test]
516 fn test_fdata_to_pc_1d_ncomp_limits() {
517 let n = 10;
518 let m = 50;
519 let (data, _) = generate_test_fdata(n, m);
520
521 let fpca = fdata_to_pc_1d(&data, 20).unwrap();
523 assert!(fpca.singular_values.len() <= n);
524 }
525
526 #[test]
527 fn test_fdata_to_pc_1d_invalid_input() {
528 let empty = FdMatrix::zeros(0, 50);
530 let result = fdata_to_pc_1d(&empty, 3);
531 assert!(result.is_err());
532
533 let (data, _) = generate_test_fdata(10, 50);
535 let result = fdata_to_pc_1d(&data, 0);
536 assert!(result.is_err());
537 }
538
539 #[test]
540 fn test_fdata_to_pc_1d_reconstruction() {
541 let n = 10;
542 let m = 30;
543 let (data, _) = generate_test_fdata(n, m);
544
545 let ncomp = n.min(m);
547 let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
548
549 for i in 0..n {
551 for j in 0..m {
552 let mut reconstructed = 0.0;
553 for k in 0..ncomp {
554 let score = fpca.scores[(i, k)];
555 let loading = fpca.rotation[(j, k)];
556 reconstructed += score * loading;
557 }
558 let original_centered = fpca.centered[(i, j)];
559 assert!(
560 (reconstructed - original_centered).abs() < 0.1,
561 "Reconstruction error at ({}, {}): {} vs {}",
562 i,
563 j,
564 reconstructed,
565 original_centered
566 );
567 }
568 }
569 }
570
571 #[test]
574 fn test_fdata_to_pls_1d_basic() {
575 let n = 20;
576 let m = 30;
577 let ncomp = 3;
578 let (x, _) = generate_test_fdata(n, m);
579
580 let y: Vec<f64> = (0..n).map(|i| (i as f64 / n as f64) + 0.1).collect();
582
583 let result = fdata_to_pls_1d(&x, &y, ncomp);
584 assert!(result.is_ok());
585
586 let pls = result.unwrap();
587 assert_eq!(pls.weights.shape(), (m, ncomp));
588 assert_eq!(pls.scores.shape(), (n, ncomp));
589 assert_eq!(pls.loadings.shape(), (m, ncomp));
590 }
591
592 #[test]
593 fn test_fdata_to_pls_1d_weights_normalized() {
594 let n = 20;
595 let m = 30;
596 let ncomp = 2;
597 let (x, _) = generate_test_fdata(n, m);
598 let y: Vec<f64> = (0..n).map(|i| i as f64).collect();
599
600 let pls = fdata_to_pls_1d(&x, &y, ncomp).unwrap();
601
602 for k in 0..ncomp {
604 let norm: f64 = (0..m)
605 .map(|j| pls.weights[(j, k)].powi(2))
606 .sum::<f64>()
607 .sqrt();
608 assert!(
609 (norm - 1.0).abs() < 0.1,
610 "Weight vector {} should be unit norm, got {}",
611 k,
612 norm
613 );
614 }
615 }
616
617 #[test]
618 fn test_fdata_to_pls_1d_invalid_input() {
619 let (x, _) = generate_test_fdata(10, 30);
620
621 let result = fdata_to_pls_1d(&x, &[0.0; 5], 2);
623 assert!(result.is_err());
624
625 let y = vec![0.0; 10];
627 let result = fdata_to_pls_1d(&x, &y, 0);
628 assert!(result.is_err());
629 }
630
631 #[cfg(feature = "linalg")]
634 #[test]
635 fn test_ridge_regression_fit_basic() {
636 let n = 50;
637 let m = 5;
638
639 let mut x = FdMatrix::zeros(n, m);
641 for i in 0..n {
642 for j in 0..m {
643 x[(i, j)] = (i as f64 + j as f64) / (n + m) as f64;
644 }
645 }
646
647 let y: Vec<f64> = (0..n)
649 .map(|i| {
650 let mut sum = 0.0;
651 for j in 0..m {
652 sum += x[(i, j)];
653 }
654 sum + 0.01 * (i as f64 % 10.0)
655 })
656 .collect();
657
658 let result = ridge_regression_fit(&x, &y, 0.1, true);
659
660 assert!(result.error.is_none(), "Ridge should fit without error");
661 assert_eq!(result.coefficients.len(), m);
662 assert_eq!(result.fitted_values.len(), n);
663 assert_eq!(result.residuals.len(), n);
664 }
665
666 #[cfg(feature = "linalg")]
667 #[test]
668 fn test_ridge_regression_fit_r_squared() {
669 let n = 50;
670 let m = 3;
671
672 let x = FdMatrix::from_column_major(
673 (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
674 n,
675 m,
676 )
677 .unwrap();
678 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
679
680 let result = ridge_regression_fit(&x, &y, 0.01, true);
681
682 assert!(
683 result.r_squared > 0.5,
684 "R-squared should be high, got {}",
685 result.r_squared
686 );
687 assert!(result.r_squared <= 1.0 + 1e-10, "R-squared should be <= 1");
688 }
689
690 #[cfg(feature = "linalg")]
691 #[test]
692 fn test_ridge_regression_fit_regularization() {
693 let n = 30;
694 let m = 10;
695
696 let x = FdMatrix::from_column_major(
697 (0..n * m)
698 .map(|i| ((i * 17) % 100) as f64 / 100.0)
699 .collect(),
700 n,
701 m,
702 )
703 .unwrap();
704 let y: Vec<f64> = (0..n).map(|i| (i as f64).sin()).collect();
705
706 let low_lambda = ridge_regression_fit(&x, &y, 0.001, true);
707 let high_lambda = ridge_regression_fit(&x, &y, 100.0, true);
708
709 let norm_low: f64 = low_lambda
710 .coefficients
711 .iter()
712 .map(|c| c.powi(2))
713 .sum::<f64>()
714 .sqrt();
715 let norm_high: f64 = high_lambda
716 .coefficients
717 .iter()
718 .map(|c| c.powi(2))
719 .sum::<f64>()
720 .sqrt();
721
722 assert!(
723 norm_high <= norm_low + 1e-6,
724 "Higher lambda should shrink coefficients: {} vs {}",
725 norm_high,
726 norm_low
727 );
728 }
729
730 #[cfg(feature = "linalg")]
731 #[test]
732 fn test_ridge_regression_fit_residuals() {
733 let n = 20;
734 let m = 3;
735
736 let x = FdMatrix::from_column_major(
737 (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
738 n,
739 m,
740 )
741 .unwrap();
742 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
743
744 let result = ridge_regression_fit(&x, &y, 0.1, true);
745
746 for i in 0..n {
747 let expected_resid = y[i] - result.fitted_values[i];
748 assert!(
749 (result.residuals[i] - expected_resid).abs() < 1e-10,
750 "Residual mismatch at {}",
751 i
752 );
753 }
754 }
755
756 #[cfg(feature = "linalg")]
757 #[test]
758 fn test_ridge_regression_fit_no_intercept() {
759 let n = 30;
760 let m = 5;
761
762 let x = FdMatrix::from_column_major(
763 (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
764 n,
765 m,
766 )
767 .unwrap();
768 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
769
770 let result = ridge_regression_fit(&x, &y, 0.1, false);
771
772 assert!(result.error.is_none());
773 assert!(
774 result.intercept.abs() < 1e-10,
775 "Intercept should be 0, got {}",
776 result.intercept
777 );
778 }
779
780 #[cfg(feature = "linalg")]
781 #[test]
782 fn test_ridge_regression_fit_invalid_input() {
783 let empty = FdMatrix::zeros(0, 5);
784 let result = ridge_regression_fit(&empty, &[], 0.1, true);
785 assert!(result.error.is_some());
786
787 let x = FdMatrix::zeros(10, 10);
788 let y = vec![0.0; 5];
789 let result = ridge_regression_fit(&x, &y, 0.1, true);
790 assert!(result.error.is_some());
791 }
792
793 #[test]
794 fn test_all_zero_fpca() {
795 let n = 5;
797 let m = 20;
798 let data = FdMatrix::zeros(n, m);
799 let result = fdata_to_pc_1d(&data, 2);
800 if let Ok(res) = result {
802 assert_eq!(res.scores.nrows(), n);
803 for &sv in &res.singular_values {
804 assert!(
805 sv.abs() < 1e-10,
806 "All-zero data should have zero singular values"
807 );
808 }
809 }
810 }
811
812 #[test]
813 fn test_n1_pca() {
814 let data = FdMatrix::from_column_major(vec![1.0, 2.0, 3.0], 1, 3).unwrap();
816 let result = fdata_to_pc_1d(&data, 1);
817 let _ = result;
820 }
821
822 #[test]
823 fn test_constant_y_pls() {
824 let n = 10;
825 let m = 20;
826 let data_vec: Vec<f64> = (0..n * m).map(|i| (i as f64 * 0.1).sin()).collect();
827 let data = FdMatrix::from_column_major(data_vec, n, m).unwrap();
828 let y = vec![5.0; n]; let result = fdata_to_pls_1d(&data, &y, 2);
830 let _ = result;
833 }
834}