Skip to main content

fdars_core/
regression.rs

1//! Regression functions for functional data.
2//!
3//! This module provides functional PCA, PLS, and ridge regression.
4
5use crate::iter_maybe_parallel;
6use crate::matrix::FdMatrix;
7#[cfg(feature = "linalg")]
8use anofox_regression::solvers::RidgeRegressor;
9#[cfg(feature = "linalg")]
10use anofox_regression::{FittedRegressor, Regressor};
11use nalgebra::SVD;
12#[cfg(feature = "parallel")]
13use rayon::iter::ParallelIterator;
14
15/// Result of functional PCA.
16pub struct FpcaResult {
17    /// Singular values
18    pub singular_values: Vec<f64>,
19    /// Rotation matrix (loadings), m x ncomp
20    pub rotation: FdMatrix,
21    /// Scores matrix, n x ncomp
22    pub scores: FdMatrix,
23    /// Mean function
24    pub mean: Vec<f64>,
25    /// Centered data, n x m
26    pub centered: FdMatrix,
27}
28
29/// Center columns of a matrix and return (centered_matrix, column_means).
30fn center_columns(data: &FdMatrix) -> (FdMatrix, Vec<f64>) {
31    let (n, m) = data.shape();
32    let means: Vec<f64> = iter_maybe_parallel!(0..m)
33        .map(|j| {
34            let col = data.column(j);
35            let sum: f64 = col.iter().sum();
36            sum / n as f64
37        })
38        .collect();
39
40    let mut centered = FdMatrix::zeros(n, m);
41    for j in 0..m {
42        for i in 0..n {
43            centered[(i, j)] = data[(i, j)] - means[j];
44        }
45    }
46    (centered, means)
47}
48
49/// Extract rotation (V) and scores (U*S) from SVD results.
50fn extract_pc_components(
51    svd: &SVD<f64, nalgebra::Dyn, nalgebra::Dyn>,
52    n: usize,
53    m: usize,
54    ncomp: usize,
55) -> Option<(Vec<f64>, FdMatrix, FdMatrix)> {
56    let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).cloned().collect();
57
58    let v_t = svd.v_t.as_ref()?;
59    let mut rotation = FdMatrix::zeros(m, ncomp);
60    for k in 0..ncomp {
61        for j in 0..m {
62            rotation[(j, k)] = v_t[(k, j)];
63        }
64    }
65
66    let u = svd.u.as_ref()?;
67    let mut scores = FdMatrix::zeros(n, ncomp);
68    for k in 0..ncomp {
69        let sv_k = singular_values[k];
70        for i in 0..n {
71            scores[(i, k)] = u[(i, k)] * sv_k;
72        }
73    }
74
75    Some((singular_values, rotation, scores))
76}
77
78/// Perform functional PCA via SVD on centered data.
79///
80/// # Arguments
81/// * `data` - Matrix (n x m): n observations, m evaluation points
82/// * `ncomp` - Number of components to extract
83pub fn fdata_to_pc_1d(data: &FdMatrix, ncomp: usize) -> Option<FpcaResult> {
84    let (n, m) = data.shape();
85    if n == 0 || m == 0 || ncomp < 1 {
86        return None;
87    }
88
89    let ncomp = ncomp.min(n).min(m);
90    let (centered, means) = center_columns(data);
91    let svd = SVD::new(centered.to_dmatrix(), true, true);
92    let (singular_values, rotation, scores) = extract_pc_components(&svd, n, m, ncomp)?;
93
94    Some(FpcaResult {
95        singular_values,
96        rotation,
97        scores,
98        mean: means,
99        centered,
100    })
101}
102
103/// Result of PLS regression.
104pub struct PlsResult {
105    /// Weight vectors, m x ncomp
106    pub weights: FdMatrix,
107    /// Score vectors, n x ncomp
108    pub scores: FdMatrix,
109    /// Loading vectors, m x ncomp
110    pub loadings: FdMatrix,
111}
112
113/// Compute PLS weight vector: w = X'y / ||X'y||
114fn pls_compute_weights(x_cen: &FdMatrix, y_cen: &[f64]) -> Vec<f64> {
115    let (n, m) = x_cen.shape();
116    let mut w: Vec<f64> = (0..m)
117        .map(|j| {
118            let mut sum = 0.0;
119            for i in 0..n {
120                sum += x_cen[(i, j)] * y_cen[i];
121            }
122            sum
123        })
124        .collect();
125
126    let w_norm: f64 = w.iter().map(|&wi| wi * wi).sum::<f64>().sqrt();
127    if w_norm > 1e-10 {
128        for wi in &mut w {
129            *wi /= w_norm;
130        }
131    }
132    w
133}
134
135/// Compute PLS scores: t = Xw
136fn pls_compute_scores(x_cen: &FdMatrix, w: &[f64]) -> Vec<f64> {
137    let (n, m) = x_cen.shape();
138    (0..n)
139        .map(|i| {
140            let mut sum = 0.0;
141            for j in 0..m {
142                sum += x_cen[(i, j)] * w[j];
143            }
144            sum
145        })
146        .collect()
147}
148
149/// Compute PLS loadings: p = X't / (t't)
150fn pls_compute_loadings(x_cen: &FdMatrix, t: &[f64], t_norm_sq: f64) -> Vec<f64> {
151    let (n, m) = x_cen.shape();
152    (0..m)
153        .map(|j| {
154            let mut sum = 0.0;
155            for i in 0..n {
156                sum += x_cen[(i, j)] * t[i];
157            }
158            sum / t_norm_sq.max(1e-10)
159        })
160        .collect()
161}
162
163/// Deflate X by removing the rank-1 component t * p'
164fn pls_deflate_x(x_cen: &mut FdMatrix, t: &[f64], p: &[f64]) {
165    let (n, m) = x_cen.shape();
166    for j in 0..m {
167        for i in 0..n {
168            x_cen[(i, j)] -= t[i] * p[j];
169        }
170    }
171}
172
173/// Execute one NIPALS step: compute weights/scores/loadings and deflate X and y.
174fn pls_nipals_step(
175    k: usize,
176    x_cen: &mut FdMatrix,
177    y_cen: &mut [f64],
178    weights: &mut FdMatrix,
179    scores: &mut FdMatrix,
180    loadings: &mut FdMatrix,
181) {
182    let n = x_cen.nrows();
183    let m = x_cen.ncols();
184
185    let w = pls_compute_weights(x_cen, y_cen);
186    let t = pls_compute_scores(x_cen, &w);
187    let t_norm_sq: f64 = t.iter().map(|&ti| ti * ti).sum();
188    let p = pls_compute_loadings(x_cen, &t, t_norm_sq);
189
190    for j in 0..m {
191        weights[(j, k)] = w[j];
192        loadings[(j, k)] = p[j];
193    }
194    for i in 0..n {
195        scores[(i, k)] = t[i];
196    }
197
198    pls_deflate_x(x_cen, &t, &p);
199    let t_y: f64 = t.iter().zip(y_cen.iter()).map(|(&ti, &yi)| ti * yi).sum();
200    let q = t_y / t_norm_sq.max(1e-10);
201    for i in 0..n {
202        y_cen[i] -= t[i] * q;
203    }
204}
205
206/// Perform PLS via NIPALS algorithm.
207///
208/// # Arguments
209/// * `data` - Matrix (n x m): n observations, m evaluation points
210/// * `y` - Response vector (length n)
211/// * `ncomp` - Number of components to extract
212pub fn fdata_to_pls_1d(data: &FdMatrix, y: &[f64], ncomp: usize) -> Option<PlsResult> {
213    let (n, m) = data.shape();
214    if n == 0 || m == 0 || y.len() != n || ncomp < 1 {
215        return None;
216    }
217
218    let ncomp = ncomp.min(n).min(m);
219
220    // Center X and y
221    let x_means: Vec<f64> = (0..m)
222        .map(|j| {
223            let col = data.column(j);
224            let sum: f64 = col.iter().sum();
225            sum / n as f64
226        })
227        .collect();
228
229    let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
230
231    let mut x_cen = FdMatrix::zeros(n, m);
232    for j in 0..m {
233        for i in 0..n {
234            x_cen[(i, j)] = data[(i, j)] - x_means[j];
235        }
236    }
237
238    let mut y_cen: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
239
240    let mut weights = FdMatrix::zeros(m, ncomp);
241    let mut scores = FdMatrix::zeros(n, ncomp);
242    let mut loadings = FdMatrix::zeros(m, ncomp);
243
244    // NIPALS algorithm
245    for k in 0..ncomp {
246        pls_nipals_step(
247            k,
248            &mut x_cen,
249            &mut y_cen,
250            &mut weights,
251            &mut scores,
252            &mut loadings,
253        );
254    }
255
256    Some(PlsResult {
257        weights,
258        scores,
259        loadings,
260    })
261}
262
263/// Result of ridge regression fit.
264#[cfg(feature = "linalg")]
265pub struct RidgeResult {
266    /// Coefficients
267    pub coefficients: Vec<f64>,
268    /// Intercept
269    pub intercept: f64,
270    /// Fitted values
271    pub fitted_values: Vec<f64>,
272    /// Residuals
273    pub residuals: Vec<f64>,
274    /// R-squared
275    pub r_squared: f64,
276    /// Lambda used
277    pub lambda: f64,
278    /// Error message if any
279    pub error: Option<String>,
280}
281
282/// Fit ridge regression.
283///
284/// # Arguments
285/// * `x` - Predictor matrix (n x m)
286/// * `y` - Response vector
287/// * `lambda` - Regularization parameter
288/// * `with_intercept` - Whether to include intercept
289#[cfg(feature = "linalg")]
290pub fn ridge_regression_fit(
291    x: &FdMatrix,
292    y: &[f64],
293    lambda: f64,
294    with_intercept: bool,
295) -> RidgeResult {
296    let (n, m) = x.shape();
297    if n == 0 || m == 0 || y.len() != n {
298        return RidgeResult {
299            coefficients: Vec::new(),
300            intercept: 0.0,
301            fitted_values: Vec::new(),
302            residuals: Vec::new(),
303            r_squared: 0.0,
304            lambda,
305            error: Some("Invalid input dimensions".to_string()),
306        };
307    }
308
309    // Convert to faer Mat format
310    let x_faer = faer::Mat::from_fn(n, m, |i, j| x[(i, j)]);
311    let y_faer = faer::Col::from_fn(n, |i| y[i]);
312
313    // Build and fit the ridge regressor
314    let regressor = RidgeRegressor::builder()
315        .with_intercept(with_intercept)
316        .lambda(lambda)
317        .build();
318
319    let fitted = match regressor.fit(&x_faer, &y_faer) {
320        Ok(f) => f,
321        Err(e) => {
322            return RidgeResult {
323                coefficients: Vec::new(),
324                intercept: 0.0,
325                fitted_values: Vec::new(),
326                residuals: Vec::new(),
327                r_squared: 0.0,
328                lambda,
329                error: Some(format!("Fit failed: {:?}", e)),
330            }
331        }
332    };
333
334    // Extract coefficients
335    let coefs = fitted.coefficients();
336    let coefficients: Vec<f64> = (0..coefs.nrows()).map(|i| coefs[i]).collect();
337
338    // Get intercept
339    let intercept = fitted.intercept().unwrap_or(0.0);
340
341    // Compute fitted values
342    let mut fitted_values = vec![0.0; n];
343    for i in 0..n {
344        let mut pred = intercept;
345        for j in 0..m {
346            pred += x[(i, j)] * coefficients[j];
347        }
348        fitted_values[i] = pred;
349    }
350
351    // Compute residuals
352    let residuals: Vec<f64> = y
353        .iter()
354        .zip(fitted_values.iter())
355        .map(|(&yi, &yhat)| yi - yhat)
356        .collect();
357
358    // Compute R-squared
359    let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
360    let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
361    let ss_res: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
362    let r_squared = if ss_tot > 0.0 {
363        1.0 - ss_res / ss_tot
364    } else {
365        0.0
366    };
367
368    RidgeResult {
369        coefficients,
370        intercept,
371        fitted_values,
372        residuals,
373        r_squared,
374        lambda,
375        error: None,
376    }
377}
378
379#[cfg(test)]
380mod tests {
381    use super::*;
382    use std::f64::consts::PI;
383
384    /// Generate functional data with known structure for testing
385    fn generate_test_fdata(n: usize, m: usize) -> (FdMatrix, Vec<f64>) {
386        let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
387
388        // Create n curves: sine waves with varying phase
389        let mut data = FdMatrix::zeros(n, m);
390        for i in 0..n {
391            let phase = (i as f64 / n as f64) * PI;
392            for j in 0..m {
393                data[(i, j)] = (2.0 * PI * t[j] + phase).sin();
394            }
395        }
396
397        (data, t)
398    }
399
400    // ============== FPCA tests ==============
401
402    #[test]
403    fn test_fdata_to_pc_1d_basic() {
404        let n = 20;
405        let m = 50;
406        let ncomp = 3;
407        let (data, _) = generate_test_fdata(n, m);
408
409        let result = fdata_to_pc_1d(&data, ncomp);
410        assert!(result.is_some());
411
412        let fpca = result.unwrap();
413        assert_eq!(fpca.singular_values.len(), ncomp);
414        assert_eq!(fpca.rotation.shape(), (m, ncomp));
415        assert_eq!(fpca.scores.shape(), (n, ncomp));
416        assert_eq!(fpca.mean.len(), m);
417        assert_eq!(fpca.centered.shape(), (n, m));
418    }
419
420    #[test]
421    fn test_fdata_to_pc_1d_singular_values_decreasing() {
422        let n = 20;
423        let m = 50;
424        let ncomp = 5;
425        let (data, _) = generate_test_fdata(n, m);
426
427        let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
428
429        // Singular values should be in decreasing order
430        for i in 1..fpca.singular_values.len() {
431            assert!(
432                fpca.singular_values[i] <= fpca.singular_values[i - 1] + 1e-10,
433                "Singular values should be decreasing"
434            );
435        }
436    }
437
438    #[test]
439    fn test_fdata_to_pc_1d_centered_has_zero_mean() {
440        let n = 20;
441        let m = 50;
442        let (data, _) = generate_test_fdata(n, m);
443
444        let fpca = fdata_to_pc_1d(&data, 3).unwrap();
445
446        // Column means of centered data should be zero
447        for j in 0..m {
448            let col_mean: f64 = (0..n).map(|i| fpca.centered[(i, j)]).sum::<f64>() / n as f64;
449            assert!(
450                col_mean.abs() < 1e-10,
451                "Centered data should have zero column mean"
452            );
453        }
454    }
455
456    #[test]
457    fn test_fdata_to_pc_1d_ncomp_limits() {
458        let n = 10;
459        let m = 50;
460        let (data, _) = generate_test_fdata(n, m);
461
462        // Request more components than n - should cap at n
463        let fpca = fdata_to_pc_1d(&data, 20).unwrap();
464        assert!(fpca.singular_values.len() <= n);
465    }
466
467    #[test]
468    fn test_fdata_to_pc_1d_invalid_input() {
469        // Empty data
470        let empty = FdMatrix::zeros(0, 50);
471        let result = fdata_to_pc_1d(&empty, 3);
472        assert!(result.is_none());
473
474        // Zero components
475        let (data, _) = generate_test_fdata(10, 50);
476        let result = fdata_to_pc_1d(&data, 0);
477        assert!(result.is_none());
478    }
479
480    #[test]
481    fn test_fdata_to_pc_1d_reconstruction() {
482        let n = 10;
483        let m = 30;
484        let (data, _) = generate_test_fdata(n, m);
485
486        // Use all components for perfect reconstruction
487        let ncomp = n.min(m);
488        let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
489
490        // Reconstruct: X_centered = scores * rotation^T
491        for i in 0..n {
492            for j in 0..m {
493                let mut reconstructed = 0.0;
494                for k in 0..ncomp {
495                    let score = fpca.scores[(i, k)];
496                    let loading = fpca.rotation[(j, k)];
497                    reconstructed += score * loading;
498                }
499                let original_centered = fpca.centered[(i, j)];
500                assert!(
501                    (reconstructed - original_centered).abs() < 0.1,
502                    "Reconstruction error at ({}, {}): {} vs {}",
503                    i,
504                    j,
505                    reconstructed,
506                    original_centered
507                );
508            }
509        }
510    }
511
512    // ============== PLS tests ==============
513
514    #[test]
515    fn test_fdata_to_pls_1d_basic() {
516        let n = 20;
517        let m = 30;
518        let ncomp = 3;
519        let (x, _) = generate_test_fdata(n, m);
520
521        // Create y with some relationship to x
522        let y: Vec<f64> = (0..n).map(|i| (i as f64 / n as f64) + 0.1).collect();
523
524        let result = fdata_to_pls_1d(&x, &y, ncomp);
525        assert!(result.is_some());
526
527        let pls = result.unwrap();
528        assert_eq!(pls.weights.shape(), (m, ncomp));
529        assert_eq!(pls.scores.shape(), (n, ncomp));
530        assert_eq!(pls.loadings.shape(), (m, ncomp));
531    }
532
533    #[test]
534    fn test_fdata_to_pls_1d_weights_normalized() {
535        let n = 20;
536        let m = 30;
537        let ncomp = 2;
538        let (x, _) = generate_test_fdata(n, m);
539        let y: Vec<f64> = (0..n).map(|i| i as f64).collect();
540
541        let pls = fdata_to_pls_1d(&x, &y, ncomp).unwrap();
542
543        // Weight vectors should be approximately unit norm
544        for k in 0..ncomp {
545            let norm: f64 = (0..m)
546                .map(|j| pls.weights[(j, k)].powi(2))
547                .sum::<f64>()
548                .sqrt();
549            assert!(
550                (norm - 1.0).abs() < 0.1,
551                "Weight vector {} should be unit norm, got {}",
552                k,
553                norm
554            );
555        }
556    }
557
558    #[test]
559    fn test_fdata_to_pls_1d_invalid_input() {
560        let (x, _) = generate_test_fdata(10, 30);
561
562        // Wrong y length
563        let result = fdata_to_pls_1d(&x, &[0.0; 5], 2);
564        assert!(result.is_none());
565
566        // Zero components
567        let y = vec![0.0; 10];
568        let result = fdata_to_pls_1d(&x, &y, 0);
569        assert!(result.is_none());
570    }
571
572    // ============== Ridge regression tests ==============
573
574    #[cfg(feature = "linalg")]
575    #[test]
576    fn test_ridge_regression_fit_basic() {
577        let n = 50;
578        let m = 5;
579
580        // Create X with known structure
581        let mut x = FdMatrix::zeros(n, m);
582        for i in 0..n {
583            for j in 0..m {
584                x[(i, j)] = (i as f64 + j as f64) / (n + m) as f64;
585            }
586        }
587
588        // Create y = sum of x columns + noise
589        let y: Vec<f64> = (0..n)
590            .map(|i| {
591                let mut sum = 0.0;
592                for j in 0..m {
593                    sum += x[(i, j)];
594                }
595                sum + 0.01 * (i as f64 % 10.0)
596            })
597            .collect();
598
599        let result = ridge_regression_fit(&x, &y, 0.1, true);
600
601        assert!(result.error.is_none(), "Ridge should fit without error");
602        assert_eq!(result.coefficients.len(), m);
603        assert_eq!(result.fitted_values.len(), n);
604        assert_eq!(result.residuals.len(), n);
605    }
606
607    #[cfg(feature = "linalg")]
608    #[test]
609    fn test_ridge_regression_fit_r_squared() {
610        let n = 50;
611        let m = 3;
612
613        let x = FdMatrix::from_column_major(
614            (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
615            n,
616            m,
617        )
618        .unwrap();
619        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
620
621        let result = ridge_regression_fit(&x, &y, 0.01, true);
622
623        assert!(
624            result.r_squared > 0.5,
625            "R-squared should be high, got {}",
626            result.r_squared
627        );
628        assert!(result.r_squared <= 1.0 + 1e-10, "R-squared should be <= 1");
629    }
630
631    #[cfg(feature = "linalg")]
632    #[test]
633    fn test_ridge_regression_fit_regularization() {
634        let n = 30;
635        let m = 10;
636
637        let x = FdMatrix::from_column_major(
638            (0..n * m)
639                .map(|i| ((i * 17) % 100) as f64 / 100.0)
640                .collect(),
641            n,
642            m,
643        )
644        .unwrap();
645        let y: Vec<f64> = (0..n).map(|i| (i as f64).sin()).collect();
646
647        let low_lambda = ridge_regression_fit(&x, &y, 0.001, true);
648        let high_lambda = ridge_regression_fit(&x, &y, 100.0, true);
649
650        let norm_low: f64 = low_lambda
651            .coefficients
652            .iter()
653            .map(|c| c.powi(2))
654            .sum::<f64>()
655            .sqrt();
656        let norm_high: f64 = high_lambda
657            .coefficients
658            .iter()
659            .map(|c| c.powi(2))
660            .sum::<f64>()
661            .sqrt();
662
663        assert!(
664            norm_high <= norm_low + 1e-6,
665            "Higher lambda should shrink coefficients: {} vs {}",
666            norm_high,
667            norm_low
668        );
669    }
670
671    #[cfg(feature = "linalg")]
672    #[test]
673    fn test_ridge_regression_fit_residuals() {
674        let n = 20;
675        let m = 3;
676
677        let x = FdMatrix::from_column_major(
678            (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
679            n,
680            m,
681        )
682        .unwrap();
683        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
684
685        let result = ridge_regression_fit(&x, &y, 0.1, true);
686
687        for i in 0..n {
688            let expected_resid = y[i] - result.fitted_values[i];
689            assert!(
690                (result.residuals[i] - expected_resid).abs() < 1e-10,
691                "Residual mismatch at {}",
692                i
693            );
694        }
695    }
696
697    #[cfg(feature = "linalg")]
698    #[test]
699    fn test_ridge_regression_fit_no_intercept() {
700        let n = 30;
701        let m = 5;
702
703        let x = FdMatrix::from_column_major(
704            (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
705            n,
706            m,
707        )
708        .unwrap();
709        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
710
711        let result = ridge_regression_fit(&x, &y, 0.1, false);
712
713        assert!(result.error.is_none());
714        assert!(
715            result.intercept.abs() < 1e-10,
716            "Intercept should be 0, got {}",
717            result.intercept
718        );
719    }
720
721    #[cfg(feature = "linalg")]
722    #[test]
723    fn test_ridge_regression_fit_invalid_input() {
724        let empty = FdMatrix::zeros(0, 5);
725        let result = ridge_regression_fit(&empty, &[], 0.1, true);
726        assert!(result.error.is_some());
727
728        let x = FdMatrix::zeros(10, 10);
729        let y = vec![0.0; 5];
730        let result = ridge_regression_fit(&x, &y, 0.1, true);
731        assert!(result.error.is_some());
732    }
733}