1use crate::matrix::FdMatrix;
6#[cfg(feature = "linalg")]
7use anofox_regression::solvers::RidgeRegressor;
8#[cfg(feature = "linalg")]
9use anofox_regression::{FittedRegressor, Regressor};
10use nalgebra::SVD;
11
12pub struct FpcaResult {
14 pub singular_values: Vec<f64>,
16 pub rotation: FdMatrix,
18 pub scores: FdMatrix,
20 pub mean: Vec<f64>,
22 pub centered: FdMatrix,
24}
25
26fn center_columns(data: &FdMatrix) -> (FdMatrix, Vec<f64>) {
28 let (n, m) = data.shape();
29 let mut centered = FdMatrix::zeros(n, m);
30 let mut means = vec![0.0; m];
31 for j in 0..m {
32 let col = data.column(j);
33 let mean = col.iter().sum::<f64>() / n as f64;
34 means[j] = mean;
35 let out_col = centered.column_mut(j);
36 for i in 0..n {
37 out_col[i] = col[i] - mean;
38 }
39 }
40 (centered, means)
41}
42
43fn extract_pc_components(
45 svd: &SVD<f64, nalgebra::Dyn, nalgebra::Dyn>,
46 n: usize,
47 m: usize,
48 ncomp: usize,
49) -> Option<(Vec<f64>, FdMatrix, FdMatrix)> {
50 let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).cloned().collect();
51
52 let v_t = svd.v_t.as_ref()?;
53 let mut rotation = FdMatrix::zeros(m, ncomp);
54 for k in 0..ncomp {
55 for j in 0..m {
56 rotation[(j, k)] = v_t[(k, j)];
57 }
58 }
59
60 let u = svd.u.as_ref()?;
61 let mut scores = FdMatrix::zeros(n, ncomp);
62 for k in 0..ncomp {
63 let sv_k = singular_values[k];
64 for i in 0..n {
65 scores[(i, k)] = u[(i, k)] * sv_k;
66 }
67 }
68
69 Some((singular_values, rotation, scores))
70}
71
72pub fn fdata_to_pc_1d(data: &FdMatrix, ncomp: usize) -> Option<FpcaResult> {
78 let (n, m) = data.shape();
79 if n == 0 || m == 0 || ncomp < 1 {
80 return None;
81 }
82
83 let ncomp = ncomp.min(n).min(m);
84 let (centered, means) = center_columns(data);
85 let svd = SVD::new(centered.to_dmatrix(), true, true);
86 let (singular_values, rotation, scores) = extract_pc_components(&svd, n, m, ncomp)?;
87
88 Some(FpcaResult {
89 singular_values,
90 rotation,
91 scores,
92 mean: means,
93 centered,
94 })
95}
96
97pub struct PlsResult {
99 pub weights: FdMatrix,
101 pub scores: FdMatrix,
103 pub loadings: FdMatrix,
105}
106
107fn pls_compute_weights(x_cen: &FdMatrix, y_cen: &[f64]) -> Vec<f64> {
109 let (n, m) = x_cen.shape();
110 let mut w: Vec<f64> = (0..m)
111 .map(|j| {
112 let mut sum = 0.0;
113 for i in 0..n {
114 sum += x_cen[(i, j)] * y_cen[i];
115 }
116 sum
117 })
118 .collect();
119
120 let w_norm: f64 = w.iter().map(|&wi| wi * wi).sum::<f64>().sqrt();
121 if w_norm > 1e-10 {
122 for wi in &mut w {
123 *wi /= w_norm;
124 }
125 }
126 w
127}
128
129fn pls_compute_scores(x_cen: &FdMatrix, w: &[f64]) -> Vec<f64> {
131 let (n, m) = x_cen.shape();
132 (0..n)
133 .map(|i| {
134 let mut sum = 0.0;
135 for j in 0..m {
136 sum += x_cen[(i, j)] * w[j];
137 }
138 sum
139 })
140 .collect()
141}
142
143fn pls_compute_loadings(x_cen: &FdMatrix, t: &[f64], t_norm_sq: f64) -> Vec<f64> {
145 let (n, m) = x_cen.shape();
146 (0..m)
147 .map(|j| {
148 let mut sum = 0.0;
149 for i in 0..n {
150 sum += x_cen[(i, j)] * t[i];
151 }
152 sum / t_norm_sq.max(1e-10)
153 })
154 .collect()
155}
156
157fn pls_deflate_x(x_cen: &mut FdMatrix, t: &[f64], p: &[f64]) {
159 let (n, m) = x_cen.shape();
160 for j in 0..m {
161 for i in 0..n {
162 x_cen[(i, j)] -= t[i] * p[j];
163 }
164 }
165}
166
167fn pls_nipals_step(
169 k: usize,
170 x_cen: &mut FdMatrix,
171 y_cen: &mut [f64],
172 weights: &mut FdMatrix,
173 scores: &mut FdMatrix,
174 loadings: &mut FdMatrix,
175) {
176 let n = x_cen.nrows();
177 let m = x_cen.ncols();
178
179 let w = pls_compute_weights(x_cen, y_cen);
180 let t = pls_compute_scores(x_cen, &w);
181 let t_norm_sq: f64 = t.iter().map(|&ti| ti * ti).sum();
182 let p = pls_compute_loadings(x_cen, &t, t_norm_sq);
183
184 for j in 0..m {
185 weights[(j, k)] = w[j];
186 loadings[(j, k)] = p[j];
187 }
188 for i in 0..n {
189 scores[(i, k)] = t[i];
190 }
191
192 pls_deflate_x(x_cen, &t, &p);
193 let t_y: f64 = t.iter().zip(y_cen.iter()).map(|(&ti, &yi)| ti * yi).sum();
194 let q = t_y / t_norm_sq.max(1e-10);
195 for i in 0..n {
196 y_cen[i] -= t[i] * q;
197 }
198}
199
200pub fn fdata_to_pls_1d(data: &FdMatrix, y: &[f64], ncomp: usize) -> Option<PlsResult> {
207 let (n, m) = data.shape();
208 if n == 0 || m == 0 || y.len() != n || ncomp < 1 {
209 return None;
210 }
211
212 let ncomp = ncomp.min(n).min(m);
213
214 let x_means: Vec<f64> = (0..m)
216 .map(|j| {
217 let col = data.column(j);
218 let sum: f64 = col.iter().sum();
219 sum / n as f64
220 })
221 .collect();
222
223 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
224
225 let mut x_cen = FdMatrix::zeros(n, m);
226 for j in 0..m {
227 for i in 0..n {
228 x_cen[(i, j)] = data[(i, j)] - x_means[j];
229 }
230 }
231
232 let mut y_cen: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
233
234 let mut weights = FdMatrix::zeros(m, ncomp);
235 let mut scores = FdMatrix::zeros(n, ncomp);
236 let mut loadings = FdMatrix::zeros(m, ncomp);
237
238 for k in 0..ncomp {
240 pls_nipals_step(
241 k,
242 &mut x_cen,
243 &mut y_cen,
244 &mut weights,
245 &mut scores,
246 &mut loadings,
247 );
248 }
249
250 Some(PlsResult {
251 weights,
252 scores,
253 loadings,
254 })
255}
256
257#[cfg(feature = "linalg")]
259pub struct RidgeResult {
260 pub coefficients: Vec<f64>,
262 pub intercept: f64,
264 pub fitted_values: Vec<f64>,
266 pub residuals: Vec<f64>,
268 pub r_squared: f64,
270 pub lambda: f64,
272 pub error: Option<String>,
274}
275
276#[cfg(feature = "linalg")]
284pub fn ridge_regression_fit(
285 x: &FdMatrix,
286 y: &[f64],
287 lambda: f64,
288 with_intercept: bool,
289) -> RidgeResult {
290 let (n, m) = x.shape();
291 if n == 0 || m == 0 || y.len() != n {
292 return RidgeResult {
293 coefficients: Vec::new(),
294 intercept: 0.0,
295 fitted_values: Vec::new(),
296 residuals: Vec::new(),
297 r_squared: 0.0,
298 lambda,
299 error: Some("Invalid input dimensions".to_string()),
300 };
301 }
302
303 let x_faer = faer::Mat::from_fn(n, m, |i, j| x[(i, j)]);
305 let y_faer = faer::Col::from_fn(n, |i| y[i]);
306
307 let regressor = RidgeRegressor::builder()
309 .with_intercept(with_intercept)
310 .lambda(lambda)
311 .build();
312
313 let fitted = match regressor.fit(&x_faer, &y_faer) {
314 Ok(f) => f,
315 Err(e) => {
316 return RidgeResult {
317 coefficients: Vec::new(),
318 intercept: 0.0,
319 fitted_values: Vec::new(),
320 residuals: Vec::new(),
321 r_squared: 0.0,
322 lambda,
323 error: Some(format!("Fit failed: {:?}", e)),
324 }
325 }
326 };
327
328 let coefs = fitted.coefficients();
330 let coefficients: Vec<f64> = (0..coefs.nrows()).map(|i| coefs[i]).collect();
331
332 let intercept = fitted.intercept().unwrap_or(0.0);
334
335 let mut fitted_values = vec![0.0; n];
337 for i in 0..n {
338 let mut pred = intercept;
339 for j in 0..m {
340 pred += x[(i, j)] * coefficients[j];
341 }
342 fitted_values[i] = pred;
343 }
344
345 let residuals: Vec<f64> = y
347 .iter()
348 .zip(fitted_values.iter())
349 .map(|(&yi, &yhat)| yi - yhat)
350 .collect();
351
352 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
354 let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
355 let ss_res: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
356 let r_squared = if ss_tot > 0.0 {
357 1.0 - ss_res / ss_tot
358 } else {
359 0.0
360 };
361
362 RidgeResult {
363 coefficients,
364 intercept,
365 fitted_values,
366 residuals,
367 r_squared,
368 lambda,
369 error: None,
370 }
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376 use std::f64::consts::PI;
377
378 fn generate_test_fdata(n: usize, m: usize) -> (FdMatrix, Vec<f64>) {
380 let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
381
382 let mut data = FdMatrix::zeros(n, m);
384 for i in 0..n {
385 let phase = (i as f64 / n as f64) * PI;
386 for j in 0..m {
387 data[(i, j)] = (2.0 * PI * t[j] + phase).sin();
388 }
389 }
390
391 (data, t)
392 }
393
394 #[test]
397 fn test_fdata_to_pc_1d_basic() {
398 let n = 20;
399 let m = 50;
400 let ncomp = 3;
401 let (data, _) = generate_test_fdata(n, m);
402
403 let result = fdata_to_pc_1d(&data, ncomp);
404 assert!(result.is_some());
405
406 let fpca = result.unwrap();
407 assert_eq!(fpca.singular_values.len(), ncomp);
408 assert_eq!(fpca.rotation.shape(), (m, ncomp));
409 assert_eq!(fpca.scores.shape(), (n, ncomp));
410 assert_eq!(fpca.mean.len(), m);
411 assert_eq!(fpca.centered.shape(), (n, m));
412 }
413
414 #[test]
415 fn test_fdata_to_pc_1d_singular_values_decreasing() {
416 let n = 20;
417 let m = 50;
418 let ncomp = 5;
419 let (data, _) = generate_test_fdata(n, m);
420
421 let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
422
423 for i in 1..fpca.singular_values.len() {
425 assert!(
426 fpca.singular_values[i] <= fpca.singular_values[i - 1] + 1e-10,
427 "Singular values should be decreasing"
428 );
429 }
430 }
431
432 #[test]
433 fn test_fdata_to_pc_1d_centered_has_zero_mean() {
434 let n = 20;
435 let m = 50;
436 let (data, _) = generate_test_fdata(n, m);
437
438 let fpca = fdata_to_pc_1d(&data, 3).unwrap();
439
440 for j in 0..m {
442 let col_mean: f64 = (0..n).map(|i| fpca.centered[(i, j)]).sum::<f64>() / n as f64;
443 assert!(
444 col_mean.abs() < 1e-10,
445 "Centered data should have zero column mean"
446 );
447 }
448 }
449
450 #[test]
451 fn test_fdata_to_pc_1d_ncomp_limits() {
452 let n = 10;
453 let m = 50;
454 let (data, _) = generate_test_fdata(n, m);
455
456 let fpca = fdata_to_pc_1d(&data, 20).unwrap();
458 assert!(fpca.singular_values.len() <= n);
459 }
460
461 #[test]
462 fn test_fdata_to_pc_1d_invalid_input() {
463 let empty = FdMatrix::zeros(0, 50);
465 let result = fdata_to_pc_1d(&empty, 3);
466 assert!(result.is_none());
467
468 let (data, _) = generate_test_fdata(10, 50);
470 let result = fdata_to_pc_1d(&data, 0);
471 assert!(result.is_none());
472 }
473
474 #[test]
475 fn test_fdata_to_pc_1d_reconstruction() {
476 let n = 10;
477 let m = 30;
478 let (data, _) = generate_test_fdata(n, m);
479
480 let ncomp = n.min(m);
482 let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
483
484 for i in 0..n {
486 for j in 0..m {
487 let mut reconstructed = 0.0;
488 for k in 0..ncomp {
489 let score = fpca.scores[(i, k)];
490 let loading = fpca.rotation[(j, k)];
491 reconstructed += score * loading;
492 }
493 let original_centered = fpca.centered[(i, j)];
494 assert!(
495 (reconstructed - original_centered).abs() < 0.1,
496 "Reconstruction error at ({}, {}): {} vs {}",
497 i,
498 j,
499 reconstructed,
500 original_centered
501 );
502 }
503 }
504 }
505
506 #[test]
509 fn test_fdata_to_pls_1d_basic() {
510 let n = 20;
511 let m = 30;
512 let ncomp = 3;
513 let (x, _) = generate_test_fdata(n, m);
514
515 let y: Vec<f64> = (0..n).map(|i| (i as f64 / n as f64) + 0.1).collect();
517
518 let result = fdata_to_pls_1d(&x, &y, ncomp);
519 assert!(result.is_some());
520
521 let pls = result.unwrap();
522 assert_eq!(pls.weights.shape(), (m, ncomp));
523 assert_eq!(pls.scores.shape(), (n, ncomp));
524 assert_eq!(pls.loadings.shape(), (m, ncomp));
525 }
526
527 #[test]
528 fn test_fdata_to_pls_1d_weights_normalized() {
529 let n = 20;
530 let m = 30;
531 let ncomp = 2;
532 let (x, _) = generate_test_fdata(n, m);
533 let y: Vec<f64> = (0..n).map(|i| i as f64).collect();
534
535 let pls = fdata_to_pls_1d(&x, &y, ncomp).unwrap();
536
537 for k in 0..ncomp {
539 let norm: f64 = (0..m)
540 .map(|j| pls.weights[(j, k)].powi(2))
541 .sum::<f64>()
542 .sqrt();
543 assert!(
544 (norm - 1.0).abs() < 0.1,
545 "Weight vector {} should be unit norm, got {}",
546 k,
547 norm
548 );
549 }
550 }
551
552 #[test]
553 fn test_fdata_to_pls_1d_invalid_input() {
554 let (x, _) = generate_test_fdata(10, 30);
555
556 let result = fdata_to_pls_1d(&x, &[0.0; 5], 2);
558 assert!(result.is_none());
559
560 let y = vec![0.0; 10];
562 let result = fdata_to_pls_1d(&x, &y, 0);
563 assert!(result.is_none());
564 }
565
566 #[cfg(feature = "linalg")]
569 #[test]
570 fn test_ridge_regression_fit_basic() {
571 let n = 50;
572 let m = 5;
573
574 let mut x = FdMatrix::zeros(n, m);
576 for i in 0..n {
577 for j in 0..m {
578 x[(i, j)] = (i as f64 + j as f64) / (n + m) as f64;
579 }
580 }
581
582 let y: Vec<f64> = (0..n)
584 .map(|i| {
585 let mut sum = 0.0;
586 for j in 0..m {
587 sum += x[(i, j)];
588 }
589 sum + 0.01 * (i as f64 % 10.0)
590 })
591 .collect();
592
593 let result = ridge_regression_fit(&x, &y, 0.1, true);
594
595 assert!(result.error.is_none(), "Ridge should fit without error");
596 assert_eq!(result.coefficients.len(), m);
597 assert_eq!(result.fitted_values.len(), n);
598 assert_eq!(result.residuals.len(), n);
599 }
600
601 #[cfg(feature = "linalg")]
602 #[test]
603 fn test_ridge_regression_fit_r_squared() {
604 let n = 50;
605 let m = 3;
606
607 let x = FdMatrix::from_column_major(
608 (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
609 n,
610 m,
611 )
612 .unwrap();
613 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
614
615 let result = ridge_regression_fit(&x, &y, 0.01, true);
616
617 assert!(
618 result.r_squared > 0.5,
619 "R-squared should be high, got {}",
620 result.r_squared
621 );
622 assert!(result.r_squared <= 1.0 + 1e-10, "R-squared should be <= 1");
623 }
624
625 #[cfg(feature = "linalg")]
626 #[test]
627 fn test_ridge_regression_fit_regularization() {
628 let n = 30;
629 let m = 10;
630
631 let x = FdMatrix::from_column_major(
632 (0..n * m)
633 .map(|i| ((i * 17) % 100) as f64 / 100.0)
634 .collect(),
635 n,
636 m,
637 )
638 .unwrap();
639 let y: Vec<f64> = (0..n).map(|i| (i as f64).sin()).collect();
640
641 let low_lambda = ridge_regression_fit(&x, &y, 0.001, true);
642 let high_lambda = ridge_regression_fit(&x, &y, 100.0, true);
643
644 let norm_low: f64 = low_lambda
645 .coefficients
646 .iter()
647 .map(|c| c.powi(2))
648 .sum::<f64>()
649 .sqrt();
650 let norm_high: f64 = high_lambda
651 .coefficients
652 .iter()
653 .map(|c| c.powi(2))
654 .sum::<f64>()
655 .sqrt();
656
657 assert!(
658 norm_high <= norm_low + 1e-6,
659 "Higher lambda should shrink coefficients: {} vs {}",
660 norm_high,
661 norm_low
662 );
663 }
664
665 #[cfg(feature = "linalg")]
666 #[test]
667 fn test_ridge_regression_fit_residuals() {
668 let n = 20;
669 let m = 3;
670
671 let x = FdMatrix::from_column_major(
672 (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
673 n,
674 m,
675 )
676 .unwrap();
677 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
678
679 let result = ridge_regression_fit(&x, &y, 0.1, true);
680
681 for i in 0..n {
682 let expected_resid = y[i] - result.fitted_values[i];
683 assert!(
684 (result.residuals[i] - expected_resid).abs() < 1e-10,
685 "Residual mismatch at {}",
686 i
687 );
688 }
689 }
690
691 #[cfg(feature = "linalg")]
692 #[test]
693 fn test_ridge_regression_fit_no_intercept() {
694 let n = 30;
695 let m = 5;
696
697 let x = FdMatrix::from_column_major(
698 (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
699 n,
700 m,
701 )
702 .unwrap();
703 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
704
705 let result = ridge_regression_fit(&x, &y, 0.1, false);
706
707 assert!(result.error.is_none());
708 assert!(
709 result.intercept.abs() < 1e-10,
710 "Intercept should be 0, got {}",
711 result.intercept
712 );
713 }
714
715 #[cfg(feature = "linalg")]
716 #[test]
717 fn test_ridge_regression_fit_invalid_input() {
718 let empty = FdMatrix::zeros(0, 5);
719 let result = ridge_regression_fit(&empty, &[], 0.1, true);
720 assert!(result.error.is_some());
721
722 let x = FdMatrix::zeros(10, 10);
723 let y = vec![0.0; 5];
724 let result = ridge_regression_fit(&x, &y, 0.1, true);
725 assert!(result.error.is_some());
726 }
727
728 #[test]
729 fn test_all_zero_fpca() {
730 let n = 5;
732 let m = 20;
733 let data = FdMatrix::zeros(n, m);
734 let result = fdata_to_pc_1d(&data, 2);
735 if let Some(res) = result {
737 assert_eq!(res.scores.nrows(), n);
738 for &sv in &res.singular_values {
739 assert!(
740 sv.abs() < 1e-10,
741 "All-zero data should have zero singular values"
742 );
743 }
744 }
745 }
746
747 #[test]
748 fn test_n1_pca() {
749 let data = FdMatrix::from_column_major(vec![1.0, 2.0, 3.0], 1, 3).unwrap();
751 let result = fdata_to_pc_1d(&data, 1);
752 let _ = result;
755 }
756
757 #[test]
758 fn test_constant_y_pls() {
759 let n = 10;
760 let m = 20;
761 let data_vec: Vec<f64> = (0..n * m).map(|i| (i as f64 * 0.1).sin()).collect();
762 let data = FdMatrix::from_column_major(data_vec, n, m).unwrap();
763 let y = vec![5.0; n]; let result = fdata_to_pls_1d(&data, &y, 2);
765 let _ = result;
768 }
769}