1use crate::iter_maybe_parallel;
6#[cfg(feature = "linalg")]
7use anofox_regression::solvers::RidgeRegressor;
8#[cfg(feature = "linalg")]
9use anofox_regression::{FittedRegressor, Regressor};
10use nalgebra::{DMatrix, SVD};
11#[cfg(feature = "parallel")]
12use rayon::iter::ParallelIterator;
13
14pub struct FpcaResult {
16 pub singular_values: Vec<f64>,
18 pub rotation: Vec<f64>,
20 pub scores: Vec<f64>,
22 pub mean: Vec<f64>,
24 pub centered: Vec<f64>,
26}
27
28pub fn fdata_to_pc_1d(data: &[f64], n: usize, m: usize, ncomp: usize) -> Option<FpcaResult> {
36 if n == 0 || m == 0 || ncomp < 1 || data.len() != n * m {
37 return None;
38 }
39
40 let ncomp = ncomp.min(n).min(m);
41
42 let means: Vec<f64> = iter_maybe_parallel!(0..m)
44 .map(|j| {
45 let mut sum = 0.0;
46 for i in 0..n {
47 sum += data[i + j * n];
48 }
49 sum / n as f64
50 })
51 .collect();
52
53 let centered_data: Vec<f64> = (0..(n * m))
55 .map(|idx| {
56 let i = idx % n;
57 let j = idx / n;
58 data[i + j * n] - means[j]
59 })
60 .collect();
61
62 let matrix = DMatrix::from_column_slice(n, m, ¢ered_data);
64
65 let svd = SVD::new(matrix, true, true);
67
68 let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).cloned().collect();
70
71 let v_t = svd.v_t.as_ref()?;
73 let rotation_data: Vec<f64> = (0..ncomp)
74 .flat_map(|k| (0..m).map(move |j| v_t[(k, j)]))
75 .collect();
76
77 let u = svd.u.as_ref()?;
79 let mut scores_data: Vec<f64> = Vec::with_capacity(n * ncomp);
80 for k in 0..ncomp {
81 let sv_k = singular_values[k];
82 for i in 0..n {
83 scores_data.push(u[(i, k)] * sv_k);
84 }
85 }
86
87 Some(FpcaResult {
88 singular_values,
89 rotation: rotation_data,
90 scores: scores_data,
91 mean: means,
92 centered: centered_data,
93 })
94}
95
96pub struct PlsResult {
98 pub weights: Vec<f64>,
100 pub scores: Vec<f64>,
102 pub loadings: Vec<f64>,
104}
105
106pub fn fdata_to_pls_1d(
108 data: &[f64],
109 n: usize,
110 m: usize,
111 y: &[f64],
112 ncomp: usize,
113) -> Option<PlsResult> {
114 if n == 0 || m == 0 || y.len() != n || ncomp < 1 || data.len() != n * m {
115 return None;
116 }
117
118 let ncomp = ncomp.min(n).min(m);
119
120 let x_means: Vec<f64> = (0..m)
122 .map(|j| {
123 let mut sum = 0.0;
124 for i in 0..n {
125 sum += data[i + j * n];
126 }
127 sum / n as f64
128 })
129 .collect();
130
131 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
132
133 let mut x_cen: Vec<f64> = (0..(n * m))
134 .map(|idx| {
135 let i = idx % n;
136 let j = idx / n;
137 data[i + j * n] - x_means[j]
138 })
139 .collect();
140
141 let mut y_cen: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
142
143 let mut weights = vec![0.0; m * ncomp];
144 let mut scores = vec![0.0; n * ncomp];
145 let mut loadings = vec![0.0; m * ncomp];
146
147 for k in 0..ncomp {
149 let mut w: Vec<f64> = (0..m)
151 .map(|j| {
152 let mut sum = 0.0;
153 for i in 0..n {
154 sum += x_cen[i + j * n] * y_cen[i];
155 }
156 sum
157 })
158 .collect();
159
160 let w_norm: f64 = w.iter().map(|&wi| wi * wi).sum::<f64>().sqrt();
161 if w_norm > 1e-10 {
162 for wi in &mut w {
163 *wi /= w_norm;
164 }
165 }
166
167 let t: Vec<f64> = (0..n)
169 .map(|i| {
170 let mut sum = 0.0;
171 for j in 0..m {
172 sum += x_cen[i + j * n] * w[j];
173 }
174 sum
175 })
176 .collect();
177
178 let t_norm_sq: f64 = t.iter().map(|&ti| ti * ti).sum();
179
180 let p: Vec<f64> = (0..m)
182 .map(|j| {
183 let mut sum = 0.0;
184 for i in 0..n {
185 sum += x_cen[i + j * n] * t[i];
186 }
187 sum / t_norm_sq.max(1e-10)
188 })
189 .collect();
190
191 for j in 0..m {
193 weights[j + k * m] = w[j];
194 loadings[j + k * m] = p[j];
195 }
196 for i in 0..n {
197 scores[i + k * n] = t[i];
198 }
199
200 for j in 0..m {
202 for i in 0..n {
203 x_cen[i + j * n] -= t[i] * p[j];
204 }
205 }
206
207 let t_y: f64 = t.iter().zip(y_cen.iter()).map(|(&ti, &yi)| ti * yi).sum();
209 let q = t_y / t_norm_sq.max(1e-10);
210 for i in 0..n {
211 y_cen[i] -= t[i] * q;
212 }
213 }
214
215 Some(PlsResult {
216 weights,
217 scores,
218 loadings,
219 })
220}
221
222#[cfg(feature = "linalg")]
224pub struct RidgeResult {
225 pub coefficients: Vec<f64>,
227 pub intercept: f64,
229 pub fitted_values: Vec<f64>,
231 pub residuals: Vec<f64>,
233 pub r_squared: f64,
235 pub lambda: f64,
237 pub error: Option<String>,
239}
240
241#[cfg(feature = "linalg")]
251pub fn ridge_regression_fit(
252 x: &[f64],
253 y: &[f64],
254 n: usize,
255 m: usize,
256 lambda: f64,
257 with_intercept: bool,
258) -> RidgeResult {
259 if n == 0 || m == 0 || y.len() != n || x.len() != n * m {
260 return RidgeResult {
261 coefficients: Vec::new(),
262 intercept: 0.0,
263 fitted_values: Vec::new(),
264 residuals: Vec::new(),
265 r_squared: 0.0,
266 lambda,
267 error: Some("Invalid input dimensions".to_string()),
268 };
269 }
270
271 let x_faer = faer::Mat::from_fn(n, m, |i, j| x[i + j * n]);
273 let y_faer = faer::Col::from_fn(n, |i| y[i]);
274
275 let regressor = RidgeRegressor::builder()
277 .with_intercept(with_intercept)
278 .lambda(lambda)
279 .build();
280
281 let fitted = match regressor.fit(&x_faer, &y_faer) {
282 Ok(f) => f,
283 Err(e) => {
284 return RidgeResult {
285 coefficients: Vec::new(),
286 intercept: 0.0,
287 fitted_values: Vec::new(),
288 residuals: Vec::new(),
289 r_squared: 0.0,
290 lambda,
291 error: Some(format!("Fit failed: {:?}", e)),
292 }
293 }
294 };
295
296 let coefs = fitted.coefficients();
298 let coefficients: Vec<f64> = (0..coefs.nrows()).map(|i| coefs[i]).collect();
299
300 let intercept = fitted.intercept().unwrap_or(0.0);
302
303 let mut fitted_values = vec![0.0; n];
305 for i in 0..n {
306 let mut pred = intercept;
307 for j in 0..m {
308 pred += x[i + j * n] * coefficients[j];
309 }
310 fitted_values[i] = pred;
311 }
312
313 let residuals: Vec<f64> = y
315 .iter()
316 .zip(fitted_values.iter())
317 .map(|(&yi, &yhat)| yi - yhat)
318 .collect();
319
320 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
322 let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
323 let ss_res: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
324 let r_squared = if ss_tot > 0.0 {
325 1.0 - ss_res / ss_tot
326 } else {
327 0.0
328 };
329
330 RidgeResult {
331 coefficients,
332 intercept,
333 fitted_values,
334 residuals,
335 r_squared,
336 lambda,
337 error: None,
338 }
339}
340
341#[cfg(test)]
342mod tests {
343 use super::*;
344 use std::f64::consts::PI;
345
346 fn generate_test_fdata(n: usize, m: usize) -> (Vec<f64>, Vec<f64>) {
348 let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
349
350 let mut data = vec![0.0; n * m];
352 for i in 0..n {
353 let phase = (i as f64 / n as f64) * PI;
354 for j in 0..m {
355 data[i + j * n] = (2.0 * PI * t[j] + phase).sin();
356 }
357 }
358
359 (data, t)
360 }
361
362 #[test]
365 fn test_fdata_to_pc_1d_basic() {
366 let n = 20;
367 let m = 50;
368 let ncomp = 3;
369 let (data, _) = generate_test_fdata(n, m);
370
371 let result = fdata_to_pc_1d(&data, n, m, ncomp);
372 assert!(result.is_some());
373
374 let fpca = result.unwrap();
375 assert_eq!(fpca.singular_values.len(), ncomp);
376 assert_eq!(fpca.rotation.len(), m * ncomp);
377 assert_eq!(fpca.scores.len(), n * ncomp);
378 assert_eq!(fpca.mean.len(), m);
379 assert_eq!(fpca.centered.len(), n * m);
380 }
381
382 #[test]
383 fn test_fdata_to_pc_1d_singular_values_decreasing() {
384 let n = 20;
385 let m = 50;
386 let ncomp = 5;
387 let (data, _) = generate_test_fdata(n, m);
388
389 let fpca = fdata_to_pc_1d(&data, n, m, ncomp).unwrap();
390
391 for i in 1..fpca.singular_values.len() {
393 assert!(
394 fpca.singular_values[i] <= fpca.singular_values[i - 1] + 1e-10,
395 "Singular values should be decreasing"
396 );
397 }
398 }
399
400 #[test]
401 fn test_fdata_to_pc_1d_centered_has_zero_mean() {
402 let n = 20;
403 let m = 50;
404 let (data, _) = generate_test_fdata(n, m);
405
406 let fpca = fdata_to_pc_1d(&data, n, m, 3).unwrap();
407
408 for j in 0..m {
410 let col_mean: f64 = (0..n).map(|i| fpca.centered[i + j * n]).sum::<f64>() / n as f64;
411 assert!(
412 col_mean.abs() < 1e-10,
413 "Centered data should have zero column mean"
414 );
415 }
416 }
417
418 #[test]
419 fn test_fdata_to_pc_1d_ncomp_limits() {
420 let n = 10;
421 let m = 50;
422 let (data, _) = generate_test_fdata(n, m);
423
424 let fpca = fdata_to_pc_1d(&data, n, m, 20).unwrap();
426 assert!(fpca.singular_values.len() <= n);
427 }
428
429 #[test]
430 fn test_fdata_to_pc_1d_invalid_input() {
431 let result = fdata_to_pc_1d(&[], 0, 50, 3);
433 assert!(result.is_none());
434
435 let data = vec![0.0; 100];
437 let result = fdata_to_pc_1d(&data, 10, 20, 3); assert!(result.is_none());
439
440 let (data, _) = generate_test_fdata(10, 50);
442 let result = fdata_to_pc_1d(&data, 10, 50, 0);
443 assert!(result.is_none());
444 }
445
446 #[test]
447 fn test_fdata_to_pc_1d_reconstruction() {
448 let n = 10;
449 let m = 30;
450 let (data, _) = generate_test_fdata(n, m);
451
452 let ncomp = n.min(m);
454 let fpca = fdata_to_pc_1d(&data, n, m, ncomp).unwrap();
455
456 for i in 0..n {
460 for j in 0..m {
461 let mut reconstructed = 0.0;
462 for k in 0..ncomp {
463 let score = fpca.scores[i + k * n];
464 let loading = fpca.rotation[j + k * m];
465 reconstructed += score * loading;
466 }
467 let original_centered = fpca.centered[i + j * n];
468 assert!(
469 (reconstructed - original_centered).abs() < 0.1,
470 "Reconstruction error at ({}, {}): {} vs {}",
471 i,
472 j,
473 reconstructed,
474 original_centered
475 );
476 }
477 }
478 }
479
480 #[test]
483 fn test_fdata_to_pls_1d_basic() {
484 let n = 20;
485 let m = 30;
486 let ncomp = 3;
487 let (x, _) = generate_test_fdata(n, m);
488
489 let y: Vec<f64> = (0..n).map(|i| (i as f64 / n as f64) + 0.1).collect();
491
492 let result = fdata_to_pls_1d(&x, n, m, &y, ncomp);
493 assert!(result.is_some());
494
495 let pls = result.unwrap();
496 assert_eq!(pls.weights.len(), m * ncomp);
497 assert_eq!(pls.scores.len(), n * ncomp);
498 assert_eq!(pls.loadings.len(), m * ncomp);
499 }
500
501 #[test]
502 fn test_fdata_to_pls_1d_weights_normalized() {
503 let n = 20;
504 let m = 30;
505 let ncomp = 2;
506 let (x, _) = generate_test_fdata(n, m);
507 let y: Vec<f64> = (0..n).map(|i| i as f64).collect();
508
509 let pls = fdata_to_pls_1d(&x, n, m, &y, ncomp).unwrap();
510
511 for k in 0..ncomp {
513 let norm: f64 = (0..m)
514 .map(|j| pls.weights[j + k * m].powi(2))
515 .sum::<f64>()
516 .sqrt();
517 assert!(
518 (norm - 1.0).abs() < 0.1,
519 "Weight vector {} should be unit norm, got {}",
520 k,
521 norm
522 );
523 }
524 }
525
526 #[test]
527 fn test_fdata_to_pls_1d_invalid_input() {
528 let (x, _) = generate_test_fdata(10, 30);
529 let y = vec![0.0; 10];
530
531 let result = fdata_to_pls_1d(&x, 10, 30, &[0.0; 5], 2);
533 assert!(result.is_none());
534
535 let result = fdata_to_pls_1d(&x, 10, 30, &y, 0);
537 assert!(result.is_none());
538 }
539
540 #[test]
543 fn test_ridge_regression_fit_basic() {
544 let n = 50;
545 let m = 5;
546
547 let mut x = vec![0.0; n * m];
549 for i in 0..n {
550 for j in 0..m {
551 x[i + j * n] = (i as f64 + j as f64) / (n + m) as f64;
552 }
553 }
554
555 let y: Vec<f64> = (0..n)
557 .map(|i| {
558 let mut sum = 0.0;
559 for j in 0..m {
560 sum += x[i + j * n];
561 }
562 sum + 0.01 * (i as f64 % 10.0)
563 })
564 .collect();
565
566 let result = ridge_regression_fit(&x, &y, n, m, 0.1, true);
567
568 assert!(result.error.is_none(), "Ridge should fit without error");
569 assert_eq!(result.coefficients.len(), m);
570 assert_eq!(result.fitted_values.len(), n);
571 assert_eq!(result.residuals.len(), n);
572 }
573
574 #[test]
575 fn test_ridge_regression_fit_r_squared() {
576 let n = 50;
577 let m = 3;
578
579 let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
581 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
582
583 let result = ridge_regression_fit(&x, &y, n, m, 0.01, true);
584
585 assert!(
587 result.r_squared > 0.5,
588 "R-squared should be high, got {}",
589 result.r_squared
590 );
591 assert!(result.r_squared <= 1.0 + 1e-10, "R-squared should be <= 1");
592 }
593
594 #[test]
595 fn test_ridge_regression_fit_regularization() {
596 let n = 30;
597 let m = 10;
598
599 let x: Vec<f64> = (0..n * m)
601 .map(|i| ((i * 17) % 100) as f64 / 100.0)
602 .collect();
603 let y: Vec<f64> = (0..n).map(|i| (i as f64).sin()).collect();
604
605 let low_lambda = ridge_regression_fit(&x, &y, n, m, 0.001, true);
606 let high_lambda = ridge_regression_fit(&x, &y, n, m, 100.0, true);
607
608 let norm_low: f64 = low_lambda
610 .coefficients
611 .iter()
612 .map(|c| c.powi(2))
613 .sum::<f64>()
614 .sqrt();
615 let norm_high: f64 = high_lambda
616 .coefficients
617 .iter()
618 .map(|c| c.powi(2))
619 .sum::<f64>()
620 .sqrt();
621
622 assert!(
623 norm_high <= norm_low + 1e-6,
624 "Higher lambda should shrink coefficients: {} vs {}",
625 norm_high,
626 norm_low
627 );
628 }
629
630 #[test]
631 fn test_ridge_regression_fit_residuals() {
632 let n = 20;
633 let m = 3;
634
635 let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
636 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
637
638 let result = ridge_regression_fit(&x, &y, n, m, 0.1, true);
639
640 for i in 0..n {
642 let expected_resid = y[i] - result.fitted_values[i];
643 assert!(
644 (result.residuals[i] - expected_resid).abs() < 1e-10,
645 "Residual mismatch at {}",
646 i
647 );
648 }
649 }
650
651 #[test]
652 fn test_ridge_regression_fit_no_intercept() {
653 let n = 30;
654 let m = 5;
655
656 let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
657 let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
658
659 let result = ridge_regression_fit(&x, &y, n, m, 0.1, false);
660
661 assert!(result.error.is_none());
662 assert!(
664 result.intercept.abs() < 1e-10,
665 "Intercept should be 0, got {}",
666 result.intercept
667 );
668 }
669
670 #[test]
671 fn test_ridge_regression_fit_invalid_input() {
672 let result = ridge_regression_fit(&[], &[], 0, 5, 0.1, true);
674 assert!(result.error.is_some());
675
676 let x = vec![0.0; 50];
678 let y = vec![0.0; 10];
679 let result = ridge_regression_fit(&x, &y, 10, 10, 0.1, true); assert!(result.error.is_some());
681 }
682}