1use crate::iter_maybe_parallel;
6use crate::matrix::FdMatrix;
7#[cfg(feature = "linalg")]
8use anofox_regression::solvers::RidgeRegressor;
9#[cfg(feature = "linalg")]
10use anofox_regression::{FittedRegressor, Regressor};
11use nalgebra::SVD;
12#[cfg(feature = "parallel")]
13use rayon::iter::ParallelIterator;
14
15pub struct FpcaResult {
17 pub singular_values: Vec<f64>,
19 pub rotation: FdMatrix,
21 pub scores: FdMatrix,
23 pub mean: Vec<f64>,
25 pub centered: FdMatrix,
27}
28
29fn center_columns(data: &FdMatrix) -> (FdMatrix, Vec<f64>) {
31 let (n, m) = data.shape();
32 let means: Vec<f64> = iter_maybe_parallel!(0..m)
33 .map(|j| {
34 let col = data.column(j);
35 let sum: f64 = col.iter().sum();
36 sum / n as f64
37 })
38 .collect();
39
40 let mut centered = FdMatrix::zeros(n, m);
41 for j in 0..m {
42 for i in 0..n {
43 centered[(i, j)] = data[(i, j)] - means[j];
44 }
45 }
46 (centered, means)
47}
48
49fn extract_pc_components(
51 svd: &SVD<f64, nalgebra::Dyn, nalgebra::Dyn>,
52 n: usize,
53 m: usize,
54 ncomp: usize,
55) -> Option<(Vec<f64>, FdMatrix, FdMatrix)> {
56 let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).cloned().collect();
57
58 let v_t = svd.v_t.as_ref()?;
59 let mut rotation = FdMatrix::zeros(m, ncomp);
60 for k in 0..ncomp {
61 for j in 0..m {
62 rotation[(j, k)] = v_t[(k, j)];
63 }
64 }
65
66 let u = svd.u.as_ref()?;
67 let mut scores = FdMatrix::zeros(n, ncomp);
68 for k in 0..ncomp {
69 let sv_k = singular_values[k];
70 for i in 0..n {
71 scores[(i, k)] = u[(i, k)] * sv_k;
72 }
73 }
74
75 Some((singular_values, rotation, scores))
76}
77
78pub fn fdata_to_pc_1d(data: &FdMatrix, ncomp: usize) -> Option<FpcaResult> {
84 let (n, m) = data.shape();
85 if n == 0 || m == 0 || ncomp < 1 {
86 return None;
87 }
88
89 let ncomp = ncomp.min(n).min(m);
90 let (centered, means) = center_columns(data);
91 let svd = SVD::new(centered.to_dmatrix(), true, true);
92 let (singular_values, rotation, scores) = extract_pc_components(&svd, n, m, ncomp)?;
93
94 Some(FpcaResult {
95 singular_values,
96 rotation,
97 scores,
98 mean: means,
99 centered,
100 })
101}
102
103pub struct PlsResult {
105 pub weights: FdMatrix,
107 pub scores: FdMatrix,
109 pub loadings: FdMatrix,
111}
112
113fn pls_compute_weights(x_cen: &FdMatrix, y_cen: &[f64]) -> Vec<f64> {
115 let (n, m) = x_cen.shape();
116 let mut w: Vec<f64> = (0..m)
117 .map(|j| {
118 let mut sum = 0.0;
119 for i in 0..n {
120 sum += x_cen[(i, j)] * y_cen[i];
121 }
122 sum
123 })
124 .collect();
125
126 let w_norm: f64 = w.iter().map(|&wi| wi * wi).sum::<f64>().sqrt();
127 if w_norm > 1e-10 {
128 for wi in &mut w {
129 *wi /= w_norm;
130 }
131 }
132 w
133}
134
135fn pls_compute_scores(x_cen: &FdMatrix, w: &[f64]) -> Vec<f64> {
137 let (n, m) = x_cen.shape();
138 (0..n)
139 .map(|i| {
140 let mut sum = 0.0;
141 for j in 0..m {
142 sum += x_cen[(i, j)] * w[j];
143 }
144 sum
145 })
146 .collect()
147}
148
149fn pls_compute_loadings(x_cen: &FdMatrix, t: &[f64], t_norm_sq: f64) -> Vec<f64> {
151 let (n, m) = x_cen.shape();
152 (0..m)
153 .map(|j| {
154 let mut sum = 0.0;
155 for i in 0..n {
156 sum += x_cen[(i, j)] * t[i];
157 }
158 sum / t_norm_sq.max(1e-10)
159 })
160 .collect()
161}
162
163fn pls_deflate_x(x_cen: &mut FdMatrix, t: &[f64], p: &[f64]) {
165 let (n, m) = x_cen.shape();
166 for j in 0..m {
167 for i in 0..n {
168 x_cen[(i, j)] -= t[i] * p[j];
169 }
170 }
171}
172
173fn pls_nipals_step(
175 k: usize,
176 x_cen: &mut FdMatrix,
177 y_cen: &mut [f64],
178 weights: &mut FdMatrix,
179 scores: &mut FdMatrix,
180 loadings: &mut FdMatrix,
181) {
182 let n = x_cen.nrows();
183 let m = x_cen.ncols();
184
185 let w = pls_compute_weights(x_cen, y_cen);
186 let t = pls_compute_scores(x_cen, &w);
187 let t_norm_sq: f64 = t.iter().map(|&ti| ti * ti).sum();
188 let p = pls_compute_loadings(x_cen, &t, t_norm_sq);
189
190 for j in 0..m {
191 weights[(j, k)] = w[j];
192 loadings[(j, k)] = p[j];
193 }
194 for i in 0..n {
195 scores[(i, k)] = t[i];
196 }
197
198 pls_deflate_x(x_cen, &t, &p);
199 let t_y: f64 = t.iter().zip(y_cen.iter()).map(|(&ti, &yi)| ti * yi).sum();
200 let q = t_y / t_norm_sq.max(1e-10);
201 for i in 0..n {
202 y_cen[i] -= t[i] * q;
203 }
204}
205
206pub fn fdata_to_pls_1d(data: &FdMatrix, y: &[f64], ncomp: usize) -> Option<PlsResult> {
213 let (n, m) = data.shape();
214 if n == 0 || m == 0 || y.len() != n || ncomp < 1 {
215 return None;
216 }
217
218 let ncomp = ncomp.min(n).min(m);
219
220 let x_means: Vec<f64> = (0..m)
222 .map(|j| {
223 let col = data.column(j);
224 let sum: f64 = col.iter().sum();
225 sum / n as f64
226 })
227 .collect();
228
229 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
230
231 let mut x_cen = FdMatrix::zeros(n, m);
232 for j in 0..m {
233 for i in 0..n {
234 x_cen[(i, j)] = data[(i, j)] - x_means[j];
235 }
236 }
237
238 let mut y_cen: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
239
240 let mut weights = FdMatrix::zeros(m, ncomp);
241 let mut scores = FdMatrix::zeros(n, ncomp);
242 let mut loadings = FdMatrix::zeros(m, ncomp);
243
244 for k in 0..ncomp {
246 pls_nipals_step(
247 k,
248 &mut x_cen,
249 &mut y_cen,
250 &mut weights,
251 &mut scores,
252 &mut loadings,
253 );
254 }
255
256 Some(PlsResult {
257 weights,
258 scores,
259 loadings,
260 })
261}
262
263#[cfg(feature = "linalg")]
265pub struct RidgeResult {
266 pub coefficients: Vec<f64>,
268 pub intercept: f64,
270 pub fitted_values: Vec<f64>,
272 pub residuals: Vec<f64>,
274 pub r_squared: f64,
276 pub lambda: f64,
278 pub error: Option<String>,
280}
281
282#[cfg(feature = "linalg")]
290pub fn ridge_regression_fit(
291 x: &FdMatrix,
292 y: &[f64],
293 lambda: f64,
294 with_intercept: bool,
295) -> RidgeResult {
296 let (n, m) = x.shape();
297 if n == 0 || m == 0 || y.len() != n {
298 return RidgeResult {
299 coefficients: Vec::new(),
300 intercept: 0.0,
301 fitted_values: Vec::new(),
302 residuals: Vec::new(),
303 r_squared: 0.0,
304 lambda,
305 error: Some("Invalid input dimensions".to_string()),
306 };
307 }
308
309 let x_faer = faer::Mat::from_fn(n, m, |i, j| x[(i, j)]);
311 let y_faer = faer::Col::from_fn(n, |i| y[i]);
312
313 let regressor = RidgeRegressor::builder()
315 .with_intercept(with_intercept)
316 .lambda(lambda)
317 .build();
318
319 let fitted = match regressor.fit(&x_faer, &y_faer) {
320 Ok(f) => f,
321 Err(e) => {
322 return RidgeResult {
323 coefficients: Vec::new(),
324 intercept: 0.0,
325 fitted_values: Vec::new(),
326 residuals: Vec::new(),
327 r_squared: 0.0,
328 lambda,
329 error: Some(format!("Fit failed: {:?}", e)),
330 }
331 }
332 };
333
334 let coefs = fitted.coefficients();
336 let coefficients: Vec<f64> = (0..coefs.nrows()).map(|i| coefs[i]).collect();
337
338 let intercept = fitted.intercept().unwrap_or(0.0);
340
341 let mut fitted_values = vec![0.0; n];
343 for i in 0..n {
344 let mut pred = intercept;
345 for j in 0..m {
346 pred += x[(i, j)] * coefficients[j];
347 }
348 fitted_values[i] = pred;
349 }
350
351 let residuals: Vec<f64> = y
353 .iter()
354 .zip(fitted_values.iter())
355 .map(|(&yi, &yhat)| yi - yhat)
356 .collect();
357
358 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
360 let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
361 let ss_res: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
362 let r_squared = if ss_tot > 0.0 {
363 1.0 - ss_res / ss_tot
364 } else {
365 0.0
366 };
367
368 RidgeResult {
369 coefficients,
370 intercept,
371 fitted_values,
372 residuals,
373 r_squared,
374 lambda,
375 error: None,
376 }
377}
378
379#[cfg(test)]
380mod tests {
381 use super::*;
382 use std::f64::consts::PI;
383
384 fn generate_test_fdata(n: usize, m: usize) -> (FdMatrix, Vec<f64>) {
386 let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
387
388 let mut data = FdMatrix::zeros(n, m);
390 for i in 0..n {
391 let phase = (i as f64 / n as f64) * PI;
392 for j in 0..m {
393 data[(i, j)] = (2.0 * PI * t[j] + phase).sin();
394 }
395 }
396
397 (data, t)
398 }
399
400 #[test]
403 fn test_fdata_to_pc_1d_basic() {
404 let n = 20;
405 let m = 50;
406 let ncomp = 3;
407 let (data, _) = generate_test_fdata(n, m);
408
409 let result = fdata_to_pc_1d(&data, ncomp);
410 assert!(result.is_some());
411
412 let fpca = result.unwrap();
413 assert_eq!(fpca.singular_values.len(), ncomp);
414 assert_eq!(fpca.rotation.shape(), (m, ncomp));
415 assert_eq!(fpca.scores.shape(), (n, ncomp));
416 assert_eq!(fpca.mean.len(), m);
417 assert_eq!(fpca.centered.shape(), (n, m));
418 }
419
420 #[test]
421 fn test_fdata_to_pc_1d_singular_values_decreasing() {
422 let n = 20;
423 let m = 50;
424 let ncomp = 5;
425 let (data, _) = generate_test_fdata(n, m);
426
427 let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
428
429 for i in 1..fpca.singular_values.len() {
431 assert!(
432 fpca.singular_values[i] <= fpca.singular_values[i - 1] + 1e-10,
433 "Singular values should be decreasing"
434 );
435 }
436 }
437
438 #[test]
439 fn test_fdata_to_pc_1d_centered_has_zero_mean() {
440 let n = 20;
441 let m = 50;
442 let (data, _) = generate_test_fdata(n, m);
443
444 let fpca = fdata_to_pc_1d(&data, 3).unwrap();
445
446 for j in 0..m {
448 let col_mean: f64 = (0..n).map(|i| fpca.centered[(i, j)]).sum::<f64>() / n as f64;
449 assert!(
450 col_mean.abs() < 1e-10,
451 "Centered data should have zero column mean"
452 );
453 }
454 }
455
456 #[test]
457 fn test_fdata_to_pc_1d_ncomp_limits() {
458 let n = 10;
459 let m = 50;
460 let (data, _) = generate_test_fdata(n, m);
461
462 let fpca = fdata_to_pc_1d(&data, 20).unwrap();
464 assert!(fpca.singular_values.len() <= n);
465 }
466
467 #[test]
468 fn test_fdata_to_pc_1d_invalid_input() {
469 let empty = FdMatrix::zeros(0, 50);
471 let result = fdata_to_pc_1d(&empty, 3);
472 assert!(result.is_none());
473
474 let (data, _) = generate_test_fdata(10, 50);
476 let result = fdata_to_pc_1d(&data, 0);
477 assert!(result.is_none());
478 }
479
480 #[test]
481 fn test_fdata_to_pc_1d_reconstruction() {
482 let n = 10;
483 let m = 30;
484 let (data, _) = generate_test_fdata(n, m);
485
486 let ncomp = n.min(m);
488 let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
489
490 for i in 0..n {
492 for j in 0..m {
493 let mut reconstructed = 0.0;
494 for k in 0..ncomp {
495 let score = fpca.scores[(i, k)];
496 let loading = fpca.rotation[(j, k)];
497 reconstructed += score * loading;
498 }
499 let original_centered = fpca.centered[(i, j)];
500 assert!(
501 (reconstructed - original_centered).abs() < 0.1,
502 "Reconstruction error at ({}, {}): {} vs {}",
503 i,
504 j,
505 reconstructed,
506 original_centered
507 );
508 }
509 }
510 }
511
512 #[test]
515 fn test_fdata_to_pls_1d_basic() {
516 let n = 20;
517 let m = 30;
518 let ncomp = 3;
519 let (x, _) = generate_test_fdata(n, m);
520
521 let y: Vec<f64> = (0..n).map(|i| (i as f64 / n as f64) + 0.1).collect();
523
524 let result = fdata_to_pls_1d(&x, &y, ncomp);
525 assert!(result.is_some());
526
527 let pls = result.unwrap();
528 assert_eq!(pls.weights.shape(), (m, ncomp));
529 assert_eq!(pls.scores.shape(), (n, ncomp));
530 assert_eq!(pls.loadings.shape(), (m, ncomp));
531 }
532
533 #[test]
534 fn test_fdata_to_pls_1d_weights_normalized() {
535 let n = 20;
536 let m = 30;
537 let ncomp = 2;
538 let (x, _) = generate_test_fdata(n, m);
539 let y: Vec<f64> = (0..n).map(|i| i as f64).collect();
540
541 let pls = fdata_to_pls_1d(&x, &y, ncomp).unwrap();
542
543 for k in 0..ncomp {
545 let norm: f64 = (0..m)
546 .map(|j| pls.weights[(j, k)].powi(2))
547 .sum::<f64>()
548 .sqrt();
549 assert!(
550 (norm - 1.0).abs() < 0.1,
551 "Weight vector {} should be unit norm, got {}",
552 k,
553 norm
554 );
555 }
556 }
557
558 #[test]
559 fn test_fdata_to_pls_1d_invalid_input() {
560 let (x, _) = generate_test_fdata(10, 30);
561
562 let result = fdata_to_pls_1d(&x, &[0.0; 5], 2);
564 assert!(result.is_none());
565
566 let y = vec![0.0; 10];
568 let result = fdata_to_pls_1d(&x, &y, 0);
569 assert!(result.is_none());
570 }
571
572 #[cfg(feature = "linalg")]
575 #[test]
576 fn test_ridge_regression_fit_basic() {
577 let n = 50;
578 let m = 5;
579
580 let mut x = FdMatrix::zeros(n, m);
582 for i in 0..n {
583 for j in 0..m {
584 x[(i, j)] = (i as f64 + j as f64) / (n + m) as f64;
585 }
586 }
587
588 let y: Vec<f64> = (0..n)
590 .map(|i| {
591 let mut sum = 0.0;
592 for j in 0..m {
593 sum += x[(i, j)];
594 }
595 sum + 0.01 * (i as f64 % 10.0)
596 })
597 .collect();
598
599 let result = ridge_regression_fit(&x, &y, 0.1, true);
600
601 assert!(result.error.is_none(), "Ridge should fit without error");
602 assert_eq!(result.coefficients.len(), m);
603 assert_eq!(result.fitted_values.len(), n);
604 assert_eq!(result.residuals.len(), n);
605 }
606
607 #[cfg(feature = "linalg")]
608 #[test]
609 fn test_ridge_regression_fit_r_squared() {
610 let n = 50;
611 let m = 3;
612
613 let x = FdMatrix::from_column_major(
614 (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
615 n,
616 m,
617 )
618 .unwrap();
619 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
620
621 let result = ridge_regression_fit(&x, &y, 0.01, true);
622
623 assert!(
624 result.r_squared > 0.5,
625 "R-squared should be high, got {}",
626 result.r_squared
627 );
628 assert!(result.r_squared <= 1.0 + 1e-10, "R-squared should be <= 1");
629 }
630
631 #[cfg(feature = "linalg")]
632 #[test]
633 fn test_ridge_regression_fit_regularization() {
634 let n = 30;
635 let m = 10;
636
637 let x = FdMatrix::from_column_major(
638 (0..n * m)
639 .map(|i| ((i * 17) % 100) as f64 / 100.0)
640 .collect(),
641 n,
642 m,
643 )
644 .unwrap();
645 let y: Vec<f64> = (0..n).map(|i| (i as f64).sin()).collect();
646
647 let low_lambda = ridge_regression_fit(&x, &y, 0.001, true);
648 let high_lambda = ridge_regression_fit(&x, &y, 100.0, true);
649
650 let norm_low: f64 = low_lambda
651 .coefficients
652 .iter()
653 .map(|c| c.powi(2))
654 .sum::<f64>()
655 .sqrt();
656 let norm_high: f64 = high_lambda
657 .coefficients
658 .iter()
659 .map(|c| c.powi(2))
660 .sum::<f64>()
661 .sqrt();
662
663 assert!(
664 norm_high <= norm_low + 1e-6,
665 "Higher lambda should shrink coefficients: {} vs {}",
666 norm_high,
667 norm_low
668 );
669 }
670
671 #[cfg(feature = "linalg")]
672 #[test]
673 fn test_ridge_regression_fit_residuals() {
674 let n = 20;
675 let m = 3;
676
677 let x = FdMatrix::from_column_major(
678 (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
679 n,
680 m,
681 )
682 .unwrap();
683 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
684
685 let result = ridge_regression_fit(&x, &y, 0.1, true);
686
687 for i in 0..n {
688 let expected_resid = y[i] - result.fitted_values[i];
689 assert!(
690 (result.residuals[i] - expected_resid).abs() < 1e-10,
691 "Residual mismatch at {}",
692 i
693 );
694 }
695 }
696
697 #[cfg(feature = "linalg")]
698 #[test]
699 fn test_ridge_regression_fit_no_intercept() {
700 let n = 30;
701 let m = 5;
702
703 let x = FdMatrix::from_column_major(
704 (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
705 n,
706 m,
707 )
708 .unwrap();
709 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
710
711 let result = ridge_regression_fit(&x, &y, 0.1, false);
712
713 assert!(result.error.is_none());
714 assert!(
715 result.intercept.abs() < 1e-10,
716 "Intercept should be 0, got {}",
717 result.intercept
718 );
719 }
720
721 #[cfg(feature = "linalg")]
722 #[test]
723 fn test_ridge_regression_fit_invalid_input() {
724 let empty = FdMatrix::zeros(0, 5);
725 let result = ridge_regression_fit(&empty, &[], 0.1, true);
726 assert!(result.error.is_some());
727
728 let x = FdMatrix::zeros(10, 10);
729 let y = vec![0.0; 5];
730 let result = ridge_regression_fit(&x, &y, 0.1, true);
731 assert!(result.error.is_some());
732 }
733}