1use crate::iter_maybe_parallel;
6#[cfg(feature = "linalg")]
7use anofox_regression::solvers::RidgeRegressor;
8#[cfg(feature = "linalg")]
9use anofox_regression::{FittedRegressor, Regressor};
10use nalgebra::{DMatrix, SVD};
11#[cfg(feature = "parallel")]
12use rayon::iter::ParallelIterator;
13
14pub struct FpcaResult {
16 pub singular_values: Vec<f64>,
18 pub rotation: Vec<f64>,
20 pub scores: Vec<f64>,
22 pub mean: Vec<f64>,
24 pub centered: Vec<f64>,
26}
27
28pub fn fdata_to_pc_1d(data: &[f64], n: usize, m: usize, ncomp: usize) -> Option<FpcaResult> {
36 if n == 0 || m == 0 || ncomp < 1 || data.len() != n * m {
37 return None;
38 }
39
40 let ncomp = ncomp.min(n).min(m);
41
42 let means: Vec<f64> = iter_maybe_parallel!(0..m)
44 .map(|j| {
45 let mut sum = 0.0;
46 for i in 0..n {
47 sum += data[i + j * n];
48 }
49 sum / n as f64
50 })
51 .collect();
52
53 let centered_data: Vec<f64> = (0..(n * m))
55 .map(|idx| {
56 let i = idx % n;
57 let j = idx / n;
58 data[i + j * n] - means[j]
59 })
60 .collect();
61
62 let matrix = DMatrix::from_column_slice(n, m, ¢ered_data);
64
65 let svd = SVD::new(matrix, true, true);
67
68 let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).cloned().collect();
70
71 let v_t = svd.v_t.as_ref()?;
73 let rotation_data: Vec<f64> = (0..ncomp)
74 .flat_map(|k| (0..m).map(move |j| v_t[(k, j)]))
75 .collect();
76
77 let u = svd.u.as_ref()?;
79 let mut scores_data: Vec<f64> = Vec::with_capacity(n * ncomp);
80 for k in 0..ncomp {
81 let sv_k = singular_values[k];
82 for i in 0..n {
83 scores_data.push(u[(i, k)] * sv_k);
84 }
85 }
86
87 Some(FpcaResult {
88 singular_values,
89 rotation: rotation_data,
90 scores: scores_data,
91 mean: means,
92 centered: centered_data,
93 })
94}
95
96pub struct PlsResult {
98 pub weights: Vec<f64>,
100 pub scores: Vec<f64>,
102 pub loadings: Vec<f64>,
104}
105
106fn pls_compute_weights(x_cen: &[f64], y_cen: &[f64], n: usize, m: usize) -> Vec<f64> {
108 let mut w: Vec<f64> = (0..m)
109 .map(|j| {
110 let mut sum = 0.0;
111 for i in 0..n {
112 sum += x_cen[i + j * n] * y_cen[i];
113 }
114 sum
115 })
116 .collect();
117
118 let w_norm: f64 = w.iter().map(|&wi| wi * wi).sum::<f64>().sqrt();
119 if w_norm > 1e-10 {
120 for wi in &mut w {
121 *wi /= w_norm;
122 }
123 }
124 w
125}
126
127fn pls_compute_scores(x_cen: &[f64], w: &[f64], n: usize, m: usize) -> Vec<f64> {
129 (0..n)
130 .map(|i| {
131 let mut sum = 0.0;
132 for j in 0..m {
133 sum += x_cen[i + j * n] * w[j];
134 }
135 sum
136 })
137 .collect()
138}
139
140fn pls_compute_loadings(x_cen: &[f64], t: &[f64], n: usize, m: usize, t_norm_sq: f64) -> Vec<f64> {
142 (0..m)
143 .map(|j| {
144 let mut sum = 0.0;
145 for i in 0..n {
146 sum += x_cen[i + j * n] * t[i];
147 }
148 sum / t_norm_sq.max(1e-10)
149 })
150 .collect()
151}
152
153fn pls_deflate_x(x_cen: &mut [f64], t: &[f64], p: &[f64], n: usize, m: usize) {
155 for j in 0..m {
156 for i in 0..n {
157 x_cen[i + j * n] -= t[i] * p[j];
158 }
159 }
160}
161
162fn pls_nipals_step(
164 k: usize,
165 x_cen: &mut [f64],
166 y_cen: &mut [f64],
167 n: usize,
168 m: usize,
169 weights: &mut [f64],
170 scores: &mut [f64],
171 loadings: &mut [f64],
172) {
173 let w = pls_compute_weights(x_cen, y_cen, n, m);
174 let t = pls_compute_scores(x_cen, &w, n, m);
175 let t_norm_sq: f64 = t.iter().map(|&ti| ti * ti).sum();
176 let p = pls_compute_loadings(x_cen, &t, n, m, t_norm_sq);
177
178 for j in 0..m {
179 weights[j + k * m] = w[j];
180 loadings[j + k * m] = p[j];
181 }
182 for i in 0..n {
183 scores[i + k * n] = t[i];
184 }
185
186 pls_deflate_x(x_cen, &t, &p, n, m);
187 let t_y: f64 = t.iter().zip(y_cen.iter()).map(|(&ti, &yi)| ti * yi).sum();
188 let q = t_y / t_norm_sq.max(1e-10);
189 for i in 0..n {
190 y_cen[i] -= t[i] * q;
191 }
192}
193
194pub fn fdata_to_pls_1d(
196 data: &[f64],
197 n: usize,
198 m: usize,
199 y: &[f64],
200 ncomp: usize,
201) -> Option<PlsResult> {
202 if n == 0 || m == 0 || y.len() != n || ncomp < 1 || data.len() != n * m {
203 return None;
204 }
205
206 let ncomp = ncomp.min(n).min(m);
207
208 let x_means: Vec<f64> = (0..m)
210 .map(|j| {
211 let mut sum = 0.0;
212 for i in 0..n {
213 sum += data[i + j * n];
214 }
215 sum / n as f64
216 })
217 .collect();
218
219 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
220
221 let mut x_cen: Vec<f64> = (0..(n * m))
222 .map(|idx| {
223 let i = idx % n;
224 let j = idx / n;
225 data[i + j * n] - x_means[j]
226 })
227 .collect();
228
229 let mut y_cen: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
230
231 let mut weights = vec![0.0; m * ncomp];
232 let mut scores = vec![0.0; n * ncomp];
233 let mut loadings = vec![0.0; m * ncomp];
234
235 for k in 0..ncomp {
237 pls_nipals_step(
238 k,
239 &mut x_cen,
240 &mut y_cen,
241 n,
242 m,
243 &mut weights,
244 &mut scores,
245 &mut loadings,
246 );
247 }
248
249 Some(PlsResult {
250 weights,
251 scores,
252 loadings,
253 })
254}
255
256#[cfg(feature = "linalg")]
258pub struct RidgeResult {
259 pub coefficients: Vec<f64>,
261 pub intercept: f64,
263 pub fitted_values: Vec<f64>,
265 pub residuals: Vec<f64>,
267 pub r_squared: f64,
269 pub lambda: f64,
271 pub error: Option<String>,
273}
274
275#[cfg(feature = "linalg")]
285pub fn ridge_regression_fit(
286 x: &[f64],
287 y: &[f64],
288 n: usize,
289 m: usize,
290 lambda: f64,
291 with_intercept: bool,
292) -> RidgeResult {
293 if n == 0 || m == 0 || y.len() != n || x.len() != n * m {
294 return RidgeResult {
295 coefficients: Vec::new(),
296 intercept: 0.0,
297 fitted_values: Vec::new(),
298 residuals: Vec::new(),
299 r_squared: 0.0,
300 lambda,
301 error: Some("Invalid input dimensions".to_string()),
302 };
303 }
304
305 let x_faer = faer::Mat::from_fn(n, m, |i, j| x[i + j * n]);
307 let y_faer = faer::Col::from_fn(n, |i| y[i]);
308
309 let regressor = RidgeRegressor::builder()
311 .with_intercept(with_intercept)
312 .lambda(lambda)
313 .build();
314
315 let fitted = match regressor.fit(&x_faer, &y_faer) {
316 Ok(f) => f,
317 Err(e) => {
318 return RidgeResult {
319 coefficients: Vec::new(),
320 intercept: 0.0,
321 fitted_values: Vec::new(),
322 residuals: Vec::new(),
323 r_squared: 0.0,
324 lambda,
325 error: Some(format!("Fit failed: {:?}", e)),
326 }
327 }
328 };
329
330 let coefs = fitted.coefficients();
332 let coefficients: Vec<f64> = (0..coefs.nrows()).map(|i| coefs[i]).collect();
333
334 let intercept = fitted.intercept().unwrap_or(0.0);
336
337 let mut fitted_values = vec![0.0; n];
339 for i in 0..n {
340 let mut pred = intercept;
341 for j in 0..m {
342 pred += x[i + j * n] * coefficients[j];
343 }
344 fitted_values[i] = pred;
345 }
346
347 let residuals: Vec<f64> = y
349 .iter()
350 .zip(fitted_values.iter())
351 .map(|(&yi, &yhat)| yi - yhat)
352 .collect();
353
354 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
356 let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
357 let ss_res: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
358 let r_squared = if ss_tot > 0.0 {
359 1.0 - ss_res / ss_tot
360 } else {
361 0.0
362 };
363
364 RidgeResult {
365 coefficients,
366 intercept,
367 fitted_values,
368 residuals,
369 r_squared,
370 lambda,
371 error: None,
372 }
373}
374
375#[cfg(test)]
376mod tests {
377 use super::*;
378 use std::f64::consts::PI;
379
380 fn generate_test_fdata(n: usize, m: usize) -> (Vec<f64>, Vec<f64>) {
382 let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
383
384 let mut data = vec![0.0; n * m];
386 for i in 0..n {
387 let phase = (i as f64 / n as f64) * PI;
388 for j in 0..m {
389 data[i + j * n] = (2.0 * PI * t[j] + phase).sin();
390 }
391 }
392
393 (data, t)
394 }
395
396 #[test]
399 fn test_fdata_to_pc_1d_basic() {
400 let n = 20;
401 let m = 50;
402 let ncomp = 3;
403 let (data, _) = generate_test_fdata(n, m);
404
405 let result = fdata_to_pc_1d(&data, n, m, ncomp);
406 assert!(result.is_some());
407
408 let fpca = result.unwrap();
409 assert_eq!(fpca.singular_values.len(), ncomp);
410 assert_eq!(fpca.rotation.len(), m * ncomp);
411 assert_eq!(fpca.scores.len(), n * ncomp);
412 assert_eq!(fpca.mean.len(), m);
413 assert_eq!(fpca.centered.len(), n * m);
414 }
415
416 #[test]
417 fn test_fdata_to_pc_1d_singular_values_decreasing() {
418 let n = 20;
419 let m = 50;
420 let ncomp = 5;
421 let (data, _) = generate_test_fdata(n, m);
422
423 let fpca = fdata_to_pc_1d(&data, n, m, ncomp).unwrap();
424
425 for i in 1..fpca.singular_values.len() {
427 assert!(
428 fpca.singular_values[i] <= fpca.singular_values[i - 1] + 1e-10,
429 "Singular values should be decreasing"
430 );
431 }
432 }
433
434 #[test]
435 fn test_fdata_to_pc_1d_centered_has_zero_mean() {
436 let n = 20;
437 let m = 50;
438 let (data, _) = generate_test_fdata(n, m);
439
440 let fpca = fdata_to_pc_1d(&data, n, m, 3).unwrap();
441
442 for j in 0..m {
444 let col_mean: f64 = (0..n).map(|i| fpca.centered[i + j * n]).sum::<f64>() / n as f64;
445 assert!(
446 col_mean.abs() < 1e-10,
447 "Centered data should have zero column mean"
448 );
449 }
450 }
451
452 #[test]
453 fn test_fdata_to_pc_1d_ncomp_limits() {
454 let n = 10;
455 let m = 50;
456 let (data, _) = generate_test_fdata(n, m);
457
458 let fpca = fdata_to_pc_1d(&data, n, m, 20).unwrap();
460 assert!(fpca.singular_values.len() <= n);
461 }
462
463 #[test]
464 fn test_fdata_to_pc_1d_invalid_input() {
465 let result = fdata_to_pc_1d(&[], 0, 50, 3);
467 assert!(result.is_none());
468
469 let data = vec![0.0; 100];
471 let result = fdata_to_pc_1d(&data, 10, 20, 3); assert!(result.is_none());
473
474 let (data, _) = generate_test_fdata(10, 50);
476 let result = fdata_to_pc_1d(&data, 10, 50, 0);
477 assert!(result.is_none());
478 }
479
480 #[test]
481 fn test_fdata_to_pc_1d_reconstruction() {
482 let n = 10;
483 let m = 30;
484 let (data, _) = generate_test_fdata(n, m);
485
486 let ncomp = n.min(m);
488 let fpca = fdata_to_pc_1d(&data, n, m, ncomp).unwrap();
489
490 for i in 0..n {
494 for j in 0..m {
495 let mut reconstructed = 0.0;
496 for k in 0..ncomp {
497 let score = fpca.scores[i + k * n];
498 let loading = fpca.rotation[j + k * m];
499 reconstructed += score * loading;
500 }
501 let original_centered = fpca.centered[i + j * n];
502 assert!(
503 (reconstructed - original_centered).abs() < 0.1,
504 "Reconstruction error at ({}, {}): {} vs {}",
505 i,
506 j,
507 reconstructed,
508 original_centered
509 );
510 }
511 }
512 }
513
514 #[test]
517 fn test_fdata_to_pls_1d_basic() {
518 let n = 20;
519 let m = 30;
520 let ncomp = 3;
521 let (x, _) = generate_test_fdata(n, m);
522
523 let y: Vec<f64> = (0..n).map(|i| (i as f64 / n as f64) + 0.1).collect();
525
526 let result = fdata_to_pls_1d(&x, n, m, &y, ncomp);
527 assert!(result.is_some());
528
529 let pls = result.unwrap();
530 assert_eq!(pls.weights.len(), m * ncomp);
531 assert_eq!(pls.scores.len(), n * ncomp);
532 assert_eq!(pls.loadings.len(), m * ncomp);
533 }
534
535 #[test]
536 fn test_fdata_to_pls_1d_weights_normalized() {
537 let n = 20;
538 let m = 30;
539 let ncomp = 2;
540 let (x, _) = generate_test_fdata(n, m);
541 let y: Vec<f64> = (0..n).map(|i| i as f64).collect();
542
543 let pls = fdata_to_pls_1d(&x, n, m, &y, ncomp).unwrap();
544
545 for k in 0..ncomp {
547 let norm: f64 = (0..m)
548 .map(|j| pls.weights[j + k * m].powi(2))
549 .sum::<f64>()
550 .sqrt();
551 assert!(
552 (norm - 1.0).abs() < 0.1,
553 "Weight vector {} should be unit norm, got {}",
554 k,
555 norm
556 );
557 }
558 }
559
560 #[test]
561 fn test_fdata_to_pls_1d_invalid_input() {
562 let (x, _) = generate_test_fdata(10, 30);
563 let y = vec![0.0; 10];
564
565 let result = fdata_to_pls_1d(&x, 10, 30, &[0.0; 5], 2);
567 assert!(result.is_none());
568
569 let result = fdata_to_pls_1d(&x, 10, 30, &y, 0);
571 assert!(result.is_none());
572 }
573
574 #[test]
577 fn test_ridge_regression_fit_basic() {
578 let n = 50;
579 let m = 5;
580
581 let mut x = vec![0.0; n * m];
583 for i in 0..n {
584 for j in 0..m {
585 x[i + j * n] = (i as f64 + j as f64) / (n + m) as f64;
586 }
587 }
588
589 let y: Vec<f64> = (0..n)
591 .map(|i| {
592 let mut sum = 0.0;
593 for j in 0..m {
594 sum += x[i + j * n];
595 }
596 sum + 0.01 * (i as f64 % 10.0)
597 })
598 .collect();
599
600 let result = ridge_regression_fit(&x, &y, n, m, 0.1, true);
601
602 assert!(result.error.is_none(), "Ridge should fit without error");
603 assert_eq!(result.coefficients.len(), m);
604 assert_eq!(result.fitted_values.len(), n);
605 assert_eq!(result.residuals.len(), n);
606 }
607
608 #[test]
609 fn test_ridge_regression_fit_r_squared() {
610 let n = 50;
611 let m = 3;
612
613 let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
615 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
616
617 let result = ridge_regression_fit(&x, &y, n, m, 0.01, true);
618
619 assert!(
621 result.r_squared > 0.5,
622 "R-squared should be high, got {}",
623 result.r_squared
624 );
625 assert!(result.r_squared <= 1.0 + 1e-10, "R-squared should be <= 1");
626 }
627
628 #[test]
629 fn test_ridge_regression_fit_regularization() {
630 let n = 30;
631 let m = 10;
632
633 let x: Vec<f64> = (0..n * m)
635 .map(|i| ((i * 17) % 100) as f64 / 100.0)
636 .collect();
637 let y: Vec<f64> = (0..n).map(|i| (i as f64).sin()).collect();
638
639 let low_lambda = ridge_regression_fit(&x, &y, n, m, 0.001, true);
640 let high_lambda = ridge_regression_fit(&x, &y, n, m, 100.0, true);
641
642 let norm_low: f64 = low_lambda
644 .coefficients
645 .iter()
646 .map(|c| c.powi(2))
647 .sum::<f64>()
648 .sqrt();
649 let norm_high: f64 = high_lambda
650 .coefficients
651 .iter()
652 .map(|c| c.powi(2))
653 .sum::<f64>()
654 .sqrt();
655
656 assert!(
657 norm_high <= norm_low + 1e-6,
658 "Higher lambda should shrink coefficients: {} vs {}",
659 norm_high,
660 norm_low
661 );
662 }
663
664 #[test]
665 fn test_ridge_regression_fit_residuals() {
666 let n = 20;
667 let m = 3;
668
669 let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
670 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
671
672 let result = ridge_regression_fit(&x, &y, n, m, 0.1, true);
673
674 for i in 0..n {
676 let expected_resid = y[i] - result.fitted_values[i];
677 assert!(
678 (result.residuals[i] - expected_resid).abs() < 1e-10,
679 "Residual mismatch at {}",
680 i
681 );
682 }
683 }
684
685 #[test]
686 fn test_ridge_regression_fit_no_intercept() {
687 let n = 30;
688 let m = 5;
689
690 let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
691 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
692
693 let result = ridge_regression_fit(&x, &y, n, m, 0.1, false);
694
695 assert!(result.error.is_none());
696 assert!(
698 result.intercept.abs() < 1e-10,
699 "Intercept should be 0, got {}",
700 result.intercept
701 );
702 }
703
704 #[test]
705 fn test_ridge_regression_fit_invalid_input() {
706 let result = ridge_regression_fit(&[], &[], 0, 5, 0.1, true);
708 assert!(result.error.is_some());
709
710 let x = vec![0.0; 50];
712 let y = vec![0.0; 10];
713 let result = ridge_regression_fit(&x, &y, 10, 10, 0.1, true); assert!(result.error.is_some());
715 }
716}