Skip to main content

fdars_core/
regression.rs

1//! Regression functions for functional data.
2//!
3//! This module provides functional PCA, PLS, and ridge regression.
4
5use crate::iter_maybe_parallel;
6#[cfg(feature = "linalg")]
7use anofox_regression::solvers::RidgeRegressor;
8#[cfg(feature = "linalg")]
9use anofox_regression::{FittedRegressor, Regressor};
10use nalgebra::{DMatrix, SVD};
11#[cfg(feature = "parallel")]
12use rayon::iter::ParallelIterator;
13
14/// Result of functional PCA.
15pub struct FpcaResult {
16    /// Singular values
17    pub singular_values: Vec<f64>,
18    /// Rotation matrix (loadings), m x ncomp, column-major
19    pub rotation: Vec<f64>,
20    /// Scores matrix, n x ncomp, column-major
21    pub scores: Vec<f64>,
22    /// Mean function
23    pub mean: Vec<f64>,
24    /// Centered data
25    pub centered: Vec<f64>,
26}
27
28/// Perform functional PCA via SVD on centered data.
29///
30/// # Arguments
31/// * `data` - Column-major matrix (n x m)
32/// * `n` - Number of observations
33/// * `m` - Number of evaluation points
34/// * `ncomp` - Number of components to extract
35pub fn fdata_to_pc_1d(data: &[f64], n: usize, m: usize, ncomp: usize) -> Option<FpcaResult> {
36    if n == 0 || m == 0 || ncomp < 1 || data.len() != n * m {
37        return None;
38    }
39
40    let ncomp = ncomp.min(n).min(m);
41
42    // Compute column means
43    let means: Vec<f64> = iter_maybe_parallel!(0..m)
44        .map(|j| {
45            let mut sum = 0.0;
46            for i in 0..n {
47                sum += data[i + j * n];
48            }
49            sum / n as f64
50        })
51        .collect();
52
53    // Center the data
54    let centered_data: Vec<f64> = (0..(n * m))
55        .map(|idx| {
56            let i = idx % n;
57            let j = idx / n;
58            data[i + j * n] - means[j]
59        })
60        .collect();
61
62    // Create nalgebra DMatrix
63    let matrix = DMatrix::from_column_slice(n, m, &centered_data);
64
65    // Compute SVD
66    let svd = SVD::new(matrix, true, true);
67
68    // Extract singular values
69    let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).cloned().collect();
70
71    // Extract V (right singular vectors)
72    let v_t = svd.v_t.as_ref()?;
73    let rotation_data: Vec<f64> = (0..ncomp)
74        .flat_map(|k| (0..m).map(move |j| v_t[(k, j)]))
75        .collect();
76
77    // Compute scores: X_centered * V = U * S
78    let u = svd.u.as_ref()?;
79    let mut scores_data: Vec<f64> = Vec::with_capacity(n * ncomp);
80    for k in 0..ncomp {
81        let sv_k = singular_values[k];
82        for i in 0..n {
83            scores_data.push(u[(i, k)] * sv_k);
84        }
85    }
86
87    Some(FpcaResult {
88        singular_values,
89        rotation: rotation_data,
90        scores: scores_data,
91        mean: means,
92        centered: centered_data,
93    })
94}
95
96/// Result of PLS regression.
97pub struct PlsResult {
98    /// Weight vectors, m x ncomp
99    pub weights: Vec<f64>,
100    /// Score vectors, n x ncomp
101    pub scores: Vec<f64>,
102    /// Loading vectors, m x ncomp
103    pub loadings: Vec<f64>,
104}
105
106/// Compute PLS weight vector: w = X'y / ||X'y||
107fn pls_compute_weights(x_cen: &[f64], y_cen: &[f64], n: usize, m: usize) -> Vec<f64> {
108    let mut w: Vec<f64> = (0..m)
109        .map(|j| {
110            let mut sum = 0.0;
111            for i in 0..n {
112                sum += x_cen[i + j * n] * y_cen[i];
113            }
114            sum
115        })
116        .collect();
117
118    let w_norm: f64 = w.iter().map(|&wi| wi * wi).sum::<f64>().sqrt();
119    if w_norm > 1e-10 {
120        for wi in &mut w {
121            *wi /= w_norm;
122        }
123    }
124    w
125}
126
127/// Compute PLS scores: t = Xw
128fn pls_compute_scores(x_cen: &[f64], w: &[f64], n: usize, m: usize) -> Vec<f64> {
129    (0..n)
130        .map(|i| {
131            let mut sum = 0.0;
132            for j in 0..m {
133                sum += x_cen[i + j * n] * w[j];
134            }
135            sum
136        })
137        .collect()
138}
139
140/// Compute PLS loadings: p = X't / (t't)
141fn pls_compute_loadings(x_cen: &[f64], t: &[f64], n: usize, m: usize, t_norm_sq: f64) -> Vec<f64> {
142    (0..m)
143        .map(|j| {
144            let mut sum = 0.0;
145            for i in 0..n {
146                sum += x_cen[i + j * n] * t[i];
147            }
148            sum / t_norm_sq.max(1e-10)
149        })
150        .collect()
151}
152
153/// Deflate X by removing the rank-1 component t * p'
154fn pls_deflate_x(x_cen: &mut [f64], t: &[f64], p: &[f64], n: usize, m: usize) {
155    for j in 0..m {
156        for i in 0..n {
157            x_cen[i + j * n] -= t[i] * p[j];
158        }
159    }
160}
161
162/// Execute one NIPALS step: compute weights/scores/loadings and deflate X and y.
163fn pls_nipals_step(
164    k: usize,
165    x_cen: &mut [f64],
166    y_cen: &mut [f64],
167    n: usize,
168    m: usize,
169    weights: &mut [f64],
170    scores: &mut [f64],
171    loadings: &mut [f64],
172) {
173    let w = pls_compute_weights(x_cen, y_cen, n, m);
174    let t = pls_compute_scores(x_cen, &w, n, m);
175    let t_norm_sq: f64 = t.iter().map(|&ti| ti * ti).sum();
176    let p = pls_compute_loadings(x_cen, &t, n, m, t_norm_sq);
177
178    for j in 0..m {
179        weights[j + k * m] = w[j];
180        loadings[j + k * m] = p[j];
181    }
182    for i in 0..n {
183        scores[i + k * n] = t[i];
184    }
185
186    pls_deflate_x(x_cen, &t, &p, n, m);
187    let t_y: f64 = t.iter().zip(y_cen.iter()).map(|(&ti, &yi)| ti * yi).sum();
188    let q = t_y / t_norm_sq.max(1e-10);
189    for i in 0..n {
190        y_cen[i] -= t[i] * q;
191    }
192}
193
194/// Perform PLS via NIPALS algorithm.
195pub fn fdata_to_pls_1d(
196    data: &[f64],
197    n: usize,
198    m: usize,
199    y: &[f64],
200    ncomp: usize,
201) -> Option<PlsResult> {
202    if n == 0 || m == 0 || y.len() != n || ncomp < 1 || data.len() != n * m {
203        return None;
204    }
205
206    let ncomp = ncomp.min(n).min(m);
207
208    // Center X and y
209    let x_means: Vec<f64> = (0..m)
210        .map(|j| {
211            let mut sum = 0.0;
212            for i in 0..n {
213                sum += data[i + j * n];
214            }
215            sum / n as f64
216        })
217        .collect();
218
219    let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
220
221    let mut x_cen: Vec<f64> = (0..(n * m))
222        .map(|idx| {
223            let i = idx % n;
224            let j = idx / n;
225            data[i + j * n] - x_means[j]
226        })
227        .collect();
228
229    let mut y_cen: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
230
231    let mut weights = vec![0.0; m * ncomp];
232    let mut scores = vec![0.0; n * ncomp];
233    let mut loadings = vec![0.0; m * ncomp];
234
235    // NIPALS algorithm
236    for k in 0..ncomp {
237        pls_nipals_step(
238            k,
239            &mut x_cen,
240            &mut y_cen,
241            n,
242            m,
243            &mut weights,
244            &mut scores,
245            &mut loadings,
246        );
247    }
248
249    Some(PlsResult {
250        weights,
251        scores,
252        loadings,
253    })
254}
255
256/// Result of ridge regression fit.
257#[cfg(feature = "linalg")]
258pub struct RidgeResult {
259    /// Coefficients
260    pub coefficients: Vec<f64>,
261    /// Intercept
262    pub intercept: f64,
263    /// Fitted values
264    pub fitted_values: Vec<f64>,
265    /// Residuals
266    pub residuals: Vec<f64>,
267    /// R-squared
268    pub r_squared: f64,
269    /// Lambda used
270    pub lambda: f64,
271    /// Error message if any
272    pub error: Option<String>,
273}
274
275/// Fit ridge regression.
276///
277/// # Arguments
278/// * `x` - Predictor matrix (column-major, n x m)
279/// * `y` - Response vector
280/// * `n` - Number of observations
281/// * `m` - Number of predictors
282/// * `lambda` - Regularization parameter
283/// * `with_intercept` - Whether to include intercept
284#[cfg(feature = "linalg")]
285pub fn ridge_regression_fit(
286    x: &[f64],
287    y: &[f64],
288    n: usize,
289    m: usize,
290    lambda: f64,
291    with_intercept: bool,
292) -> RidgeResult {
293    if n == 0 || m == 0 || y.len() != n || x.len() != n * m {
294        return RidgeResult {
295            coefficients: Vec::new(),
296            intercept: 0.0,
297            fitted_values: Vec::new(),
298            residuals: Vec::new(),
299            r_squared: 0.0,
300            lambda,
301            error: Some("Invalid input dimensions".to_string()),
302        };
303    }
304
305    // Convert to faer Mat format
306    let x_faer = faer::Mat::from_fn(n, m, |i, j| x[i + j * n]);
307    let y_faer = faer::Col::from_fn(n, |i| y[i]);
308
309    // Build and fit the ridge regressor
310    let regressor = RidgeRegressor::builder()
311        .with_intercept(with_intercept)
312        .lambda(lambda)
313        .build();
314
315    let fitted = match regressor.fit(&x_faer, &y_faer) {
316        Ok(f) => f,
317        Err(e) => {
318            return RidgeResult {
319                coefficients: Vec::new(),
320                intercept: 0.0,
321                fitted_values: Vec::new(),
322                residuals: Vec::new(),
323                r_squared: 0.0,
324                lambda,
325                error: Some(format!("Fit failed: {:?}", e)),
326            }
327        }
328    };
329
330    // Extract coefficients
331    let coefs = fitted.coefficients();
332    let coefficients: Vec<f64> = (0..coefs.nrows()).map(|i| coefs[i]).collect();
333
334    // Get intercept
335    let intercept = fitted.intercept().unwrap_or(0.0);
336
337    // Compute fitted values
338    let mut fitted_values = vec![0.0; n];
339    for i in 0..n {
340        let mut pred = intercept;
341        for j in 0..m {
342            pred += x[i + j * n] * coefficients[j];
343        }
344        fitted_values[i] = pred;
345    }
346
347    // Compute residuals
348    let residuals: Vec<f64> = y
349        .iter()
350        .zip(fitted_values.iter())
351        .map(|(&yi, &yhat)| yi - yhat)
352        .collect();
353
354    // Compute R-squared
355    let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
356    let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
357    let ss_res: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
358    let r_squared = if ss_tot > 0.0 {
359        1.0 - ss_res / ss_tot
360    } else {
361        0.0
362    };
363
364    RidgeResult {
365        coefficients,
366        intercept,
367        fitted_values,
368        residuals,
369        r_squared,
370        lambda,
371        error: None,
372    }
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378    use std::f64::consts::PI;
379
380    /// Generate functional data with known structure for testing
381    fn generate_test_fdata(n: usize, m: usize) -> (Vec<f64>, Vec<f64>) {
382        let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
383
384        // Create n curves: sine waves with varying phase
385        let mut data = vec![0.0; n * m];
386        for i in 0..n {
387            let phase = (i as f64 / n as f64) * PI;
388            for j in 0..m {
389                data[i + j * n] = (2.0 * PI * t[j] + phase).sin();
390            }
391        }
392
393        (data, t)
394    }
395
396    // ============== FPCA tests ==============
397
398    #[test]
399    fn test_fdata_to_pc_1d_basic() {
400        let n = 20;
401        let m = 50;
402        let ncomp = 3;
403        let (data, _) = generate_test_fdata(n, m);
404
405        let result = fdata_to_pc_1d(&data, n, m, ncomp);
406        assert!(result.is_some());
407
408        let fpca = result.unwrap();
409        assert_eq!(fpca.singular_values.len(), ncomp);
410        assert_eq!(fpca.rotation.len(), m * ncomp);
411        assert_eq!(fpca.scores.len(), n * ncomp);
412        assert_eq!(fpca.mean.len(), m);
413        assert_eq!(fpca.centered.len(), n * m);
414    }
415
416    #[test]
417    fn test_fdata_to_pc_1d_singular_values_decreasing() {
418        let n = 20;
419        let m = 50;
420        let ncomp = 5;
421        let (data, _) = generate_test_fdata(n, m);
422
423        let fpca = fdata_to_pc_1d(&data, n, m, ncomp).unwrap();
424
425        // Singular values should be in decreasing order
426        for i in 1..fpca.singular_values.len() {
427            assert!(
428                fpca.singular_values[i] <= fpca.singular_values[i - 1] + 1e-10,
429                "Singular values should be decreasing"
430            );
431        }
432    }
433
434    #[test]
435    fn test_fdata_to_pc_1d_centered_has_zero_mean() {
436        let n = 20;
437        let m = 50;
438        let (data, _) = generate_test_fdata(n, m);
439
440        let fpca = fdata_to_pc_1d(&data, n, m, 3).unwrap();
441
442        // Column means of centered data should be zero
443        for j in 0..m {
444            let col_mean: f64 = (0..n).map(|i| fpca.centered[i + j * n]).sum::<f64>() / n as f64;
445            assert!(
446                col_mean.abs() < 1e-10,
447                "Centered data should have zero column mean"
448            );
449        }
450    }
451
452    #[test]
453    fn test_fdata_to_pc_1d_ncomp_limits() {
454        let n = 10;
455        let m = 50;
456        let (data, _) = generate_test_fdata(n, m);
457
458        // Request more components than n - should cap at n
459        let fpca = fdata_to_pc_1d(&data, n, m, 20).unwrap();
460        assert!(fpca.singular_values.len() <= n);
461    }
462
463    #[test]
464    fn test_fdata_to_pc_1d_invalid_input() {
465        // Empty data
466        let result = fdata_to_pc_1d(&[], 0, 50, 3);
467        assert!(result.is_none());
468
469        // Wrong data length
470        let data = vec![0.0; 100];
471        let result = fdata_to_pc_1d(&data, 10, 20, 3); // Should be 10*20=200
472        assert!(result.is_none());
473
474        // Zero components
475        let (data, _) = generate_test_fdata(10, 50);
476        let result = fdata_to_pc_1d(&data, 10, 50, 0);
477        assert!(result.is_none());
478    }
479
480    #[test]
481    fn test_fdata_to_pc_1d_reconstruction() {
482        let n = 10;
483        let m = 30;
484        let (data, _) = generate_test_fdata(n, m);
485
486        // Use all components for perfect reconstruction
487        let ncomp = n.min(m);
488        let fpca = fdata_to_pc_1d(&data, n, m, ncomp).unwrap();
489
490        // Reconstruct: X_centered = scores * rotation^T
491        // But scores = U * S, rotation = V
492        // So X_centered ≈ sum_k (score_k * loading_k)
493        for i in 0..n {
494            for j in 0..m {
495                let mut reconstructed = 0.0;
496                for k in 0..ncomp {
497                    let score = fpca.scores[i + k * n];
498                    let loading = fpca.rotation[j + k * m];
499                    reconstructed += score * loading;
500                }
501                let original_centered = fpca.centered[i + j * n];
502                assert!(
503                    (reconstructed - original_centered).abs() < 0.1,
504                    "Reconstruction error at ({}, {}): {} vs {}",
505                    i,
506                    j,
507                    reconstructed,
508                    original_centered
509                );
510            }
511        }
512    }
513
514    // ============== PLS tests ==============
515
516    #[test]
517    fn test_fdata_to_pls_1d_basic() {
518        let n = 20;
519        let m = 30;
520        let ncomp = 3;
521        let (x, _) = generate_test_fdata(n, m);
522
523        // Create y with some relationship to x
524        let y: Vec<f64> = (0..n).map(|i| (i as f64 / n as f64) + 0.1).collect();
525
526        let result = fdata_to_pls_1d(&x, n, m, &y, ncomp);
527        assert!(result.is_some());
528
529        let pls = result.unwrap();
530        assert_eq!(pls.weights.len(), m * ncomp);
531        assert_eq!(pls.scores.len(), n * ncomp);
532        assert_eq!(pls.loadings.len(), m * ncomp);
533    }
534
535    #[test]
536    fn test_fdata_to_pls_1d_weights_normalized() {
537        let n = 20;
538        let m = 30;
539        let ncomp = 2;
540        let (x, _) = generate_test_fdata(n, m);
541        let y: Vec<f64> = (0..n).map(|i| i as f64).collect();
542
543        let pls = fdata_to_pls_1d(&x, n, m, &y, ncomp).unwrap();
544
545        // Weight vectors should be approximately unit norm
546        for k in 0..ncomp {
547            let norm: f64 = (0..m)
548                .map(|j| pls.weights[j + k * m].powi(2))
549                .sum::<f64>()
550                .sqrt();
551            assert!(
552                (norm - 1.0).abs() < 0.1,
553                "Weight vector {} should be unit norm, got {}",
554                k,
555                norm
556            );
557        }
558    }
559
560    #[test]
561    fn test_fdata_to_pls_1d_invalid_input() {
562        let (x, _) = generate_test_fdata(10, 30);
563        let y = vec![0.0; 10];
564
565        // Wrong y length
566        let result = fdata_to_pls_1d(&x, 10, 30, &[0.0; 5], 2);
567        assert!(result.is_none());
568
569        // Zero components
570        let result = fdata_to_pls_1d(&x, 10, 30, &y, 0);
571        assert!(result.is_none());
572    }
573
574    // ============== Ridge regression tests ==============
575
576    #[test]
577    fn test_ridge_regression_fit_basic() {
578        let n = 50;
579        let m = 5;
580
581        // Create X with known structure
582        let mut x = vec![0.0; n * m];
583        for i in 0..n {
584            for j in 0..m {
585                x[i + j * n] = (i as f64 + j as f64) / (n + m) as f64;
586            }
587        }
588
589        // Create y = sum of x columns + noise
590        let y: Vec<f64> = (0..n)
591            .map(|i| {
592                let mut sum = 0.0;
593                for j in 0..m {
594                    sum += x[i + j * n];
595                }
596                sum + 0.01 * (i as f64 % 10.0)
597            })
598            .collect();
599
600        let result = ridge_regression_fit(&x, &y, n, m, 0.1, true);
601
602        assert!(result.error.is_none(), "Ridge should fit without error");
603        assert_eq!(result.coefficients.len(), m);
604        assert_eq!(result.fitted_values.len(), n);
605        assert_eq!(result.residuals.len(), n);
606    }
607
608    #[test]
609    fn test_ridge_regression_fit_r_squared() {
610        let n = 50;
611        let m = 3;
612
613        // Create perfect linear relationship
614        let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
615        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
616
617        let result = ridge_regression_fit(&x, &y, n, m, 0.01, true);
618
619        // R-squared should be high for good fit
620        assert!(
621            result.r_squared > 0.5,
622            "R-squared should be high, got {}",
623            result.r_squared
624        );
625        assert!(result.r_squared <= 1.0 + 1e-10, "R-squared should be <= 1");
626    }
627
628    #[test]
629    fn test_ridge_regression_fit_regularization() {
630        let n = 30;
631        let m = 10;
632
633        // Create X
634        let x: Vec<f64> = (0..n * m)
635            .map(|i| ((i * 17) % 100) as f64 / 100.0)
636            .collect();
637        let y: Vec<f64> = (0..n).map(|i| (i as f64).sin()).collect();
638
639        let low_lambda = ridge_regression_fit(&x, &y, n, m, 0.001, true);
640        let high_lambda = ridge_regression_fit(&x, &y, n, m, 100.0, true);
641
642        // Higher lambda should give smaller coefficient norm
643        let norm_low: f64 = low_lambda
644            .coefficients
645            .iter()
646            .map(|c| c.powi(2))
647            .sum::<f64>()
648            .sqrt();
649        let norm_high: f64 = high_lambda
650            .coefficients
651            .iter()
652            .map(|c| c.powi(2))
653            .sum::<f64>()
654            .sqrt();
655
656        assert!(
657            norm_high <= norm_low + 1e-6,
658            "Higher lambda should shrink coefficients: {} vs {}",
659            norm_high,
660            norm_low
661        );
662    }
663
664    #[test]
665    fn test_ridge_regression_fit_residuals() {
666        let n = 20;
667        let m = 3;
668
669        let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
670        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
671
672        let result = ridge_regression_fit(&x, &y, n, m, 0.1, true);
673
674        // Check residuals = y - fitted
675        for i in 0..n {
676            let expected_resid = y[i] - result.fitted_values[i];
677            assert!(
678                (result.residuals[i] - expected_resid).abs() < 1e-10,
679                "Residual mismatch at {}",
680                i
681            );
682        }
683    }
684
685    #[test]
686    fn test_ridge_regression_fit_no_intercept() {
687        let n = 30;
688        let m = 5;
689
690        let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
691        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
692
693        let result = ridge_regression_fit(&x, &y, n, m, 0.1, false);
694
695        assert!(result.error.is_none());
696        // Intercept should be 0 when not fitting
697        assert!(
698            result.intercept.abs() < 1e-10,
699            "Intercept should be 0, got {}",
700            result.intercept
701        );
702    }
703
704    #[test]
705    fn test_ridge_regression_fit_invalid_input() {
706        // Empty data
707        let result = ridge_regression_fit(&[], &[], 0, 5, 0.1, true);
708        assert!(result.error.is_some());
709
710        // Mismatched dimensions
711        let x = vec![0.0; 50];
712        let y = vec![0.0; 10];
713        let result = ridge_regression_fit(&x, &y, 10, 10, 0.1, true); // x should be 10*10=100
714        assert!(result.error.is_some());
715    }
716}