fdars_core/
regression.rs

1//! Regression functions for functional data.
2//!
3//! This module provides functional PCA, PLS, and ridge regression.
4
5use anofox_regression::solvers::RidgeRegressor;
6use anofox_regression::{FittedRegressor, Regressor};
7use nalgebra::{DMatrix, SVD};
8use rayon::prelude::*;
9
10/// Result of functional PCA.
11pub struct FpcaResult {
12    /// Singular values
13    pub singular_values: Vec<f64>,
14    /// Rotation matrix (loadings), m x ncomp, column-major
15    pub rotation: Vec<f64>,
16    /// Scores matrix, n x ncomp, column-major
17    pub scores: Vec<f64>,
18    /// Mean function
19    pub mean: Vec<f64>,
20    /// Centered data
21    pub centered: Vec<f64>,
22}
23
24/// Perform functional PCA via SVD on centered data.
25///
26/// # Arguments
27/// * `data` - Column-major matrix (n x m)
28/// * `n` - Number of observations
29/// * `m` - Number of evaluation points
30/// * `ncomp` - Number of components to extract
31pub fn fdata_to_pc_1d(data: &[f64], n: usize, m: usize, ncomp: usize) -> Option<FpcaResult> {
32    if n == 0 || m == 0 || ncomp < 1 || data.len() != n * m {
33        return None;
34    }
35
36    let ncomp = ncomp.min(n).min(m);
37
38    // Compute column means
39    let means: Vec<f64> = (0..m)
40        .into_par_iter()
41        .map(|j| {
42            let mut sum = 0.0;
43            for i in 0..n {
44                sum += data[i + j * n];
45            }
46            sum / n as f64
47        })
48        .collect();
49
50    // Center the data
51    let centered_data: Vec<f64> = (0..(n * m))
52        .map(|idx| {
53            let i = idx % n;
54            let j = idx / n;
55            data[i + j * n] - means[j]
56        })
57        .collect();
58
59    // Create nalgebra DMatrix
60    let matrix = DMatrix::from_column_slice(n, m, &centered_data);
61
62    // Compute SVD
63    let svd = SVD::new(matrix, true, true);
64
65    // Extract singular values
66    let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).cloned().collect();
67
68    // Extract V (right singular vectors)
69    let v_t = svd.v_t.as_ref()?;
70    let rotation_data: Vec<f64> = (0..ncomp)
71        .flat_map(|k| (0..m).map(move |j| v_t[(k, j)]))
72        .collect();
73
74    // Compute scores: X_centered * V = U * S
75    let u = svd.u.as_ref()?;
76    let mut scores_data: Vec<f64> = Vec::with_capacity(n * ncomp);
77    for k in 0..ncomp {
78        let sv_k = singular_values[k];
79        for i in 0..n {
80            scores_data.push(u[(i, k)] * sv_k);
81        }
82    }
83
84    Some(FpcaResult {
85        singular_values,
86        rotation: rotation_data,
87        scores: scores_data,
88        mean: means,
89        centered: centered_data,
90    })
91}
92
93/// Result of PLS regression.
94pub struct PlsResult {
95    /// Weight vectors, m x ncomp
96    pub weights: Vec<f64>,
97    /// Score vectors, n x ncomp
98    pub scores: Vec<f64>,
99    /// Loading vectors, m x ncomp
100    pub loadings: Vec<f64>,
101}
102
103/// Perform PLS via NIPALS algorithm.
104pub fn fdata_to_pls_1d(
105    data: &[f64],
106    n: usize,
107    m: usize,
108    y: &[f64],
109    ncomp: usize,
110) -> Option<PlsResult> {
111    if n == 0 || m == 0 || y.len() != n || ncomp < 1 || data.len() != n * m {
112        return None;
113    }
114
115    let ncomp = ncomp.min(n).min(m);
116
117    // Center X and y
118    let x_means: Vec<f64> = (0..m)
119        .map(|j| {
120            let mut sum = 0.0;
121            for i in 0..n {
122                sum += data[i + j * n];
123            }
124            sum / n as f64
125        })
126        .collect();
127
128    let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
129
130    let mut x_cen: Vec<f64> = (0..(n * m))
131        .map(|idx| {
132            let i = idx % n;
133            let j = idx / n;
134            data[i + j * n] - x_means[j]
135        })
136        .collect();
137
138    let mut y_cen: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
139
140    let mut weights = vec![0.0; m * ncomp];
141    let mut scores = vec![0.0; n * ncomp];
142    let mut loadings = vec![0.0; m * ncomp];
143
144    // NIPALS algorithm
145    for k in 0..ncomp {
146        // w = X'y / ||X'y||
147        let mut w: Vec<f64> = (0..m)
148            .map(|j| {
149                let mut sum = 0.0;
150                for i in 0..n {
151                    sum += x_cen[i + j * n] * y_cen[i];
152                }
153                sum
154            })
155            .collect();
156
157        let w_norm: f64 = w.iter().map(|&wi| wi * wi).sum::<f64>().sqrt();
158        if w_norm > 1e-10 {
159            for wi in &mut w {
160                *wi /= w_norm;
161            }
162        }
163
164        // t = Xw
165        let t: Vec<f64> = (0..n)
166            .map(|i| {
167                let mut sum = 0.0;
168                for j in 0..m {
169                    sum += x_cen[i + j * n] * w[j];
170                }
171                sum
172            })
173            .collect();
174
175        let t_norm_sq: f64 = t.iter().map(|&ti| ti * ti).sum();
176
177        // p = X't / (t't)
178        let p: Vec<f64> = (0..m)
179            .map(|j| {
180                let mut sum = 0.0;
181                for i in 0..n {
182                    sum += x_cen[i + j * n] * t[i];
183                }
184                sum / t_norm_sq.max(1e-10)
185            })
186            .collect();
187
188        // Store results
189        for j in 0..m {
190            weights[j + k * m] = w[j];
191            loadings[j + k * m] = p[j];
192        }
193        for i in 0..n {
194            scores[i + k * n] = t[i];
195        }
196
197        // Deflate X
198        for j in 0..m {
199            for i in 0..n {
200                x_cen[i + j * n] -= t[i] * p[j];
201            }
202        }
203
204        // Deflate y
205        let t_y: f64 = t.iter().zip(y_cen.iter()).map(|(&ti, &yi)| ti * yi).sum();
206        let q = t_y / t_norm_sq.max(1e-10);
207        for i in 0..n {
208            y_cen[i] -= t[i] * q;
209        }
210    }
211
212    Some(PlsResult {
213        weights,
214        scores,
215        loadings,
216    })
217}
218
219/// Result of ridge regression fit.
220pub struct RidgeResult {
221    /// Coefficients
222    pub coefficients: Vec<f64>,
223    /// Intercept
224    pub intercept: f64,
225    /// Fitted values
226    pub fitted_values: Vec<f64>,
227    /// Residuals
228    pub residuals: Vec<f64>,
229    /// R-squared
230    pub r_squared: f64,
231    /// Lambda used
232    pub lambda: f64,
233    /// Error message if any
234    pub error: Option<String>,
235}
236
237/// Fit ridge regression.
238///
239/// # Arguments
240/// * `x` - Predictor matrix (column-major, n x m)
241/// * `y` - Response vector
242/// * `n` - Number of observations
243/// * `m` - Number of predictors
244/// * `lambda` - Regularization parameter
245/// * `with_intercept` - Whether to include intercept
246pub fn ridge_regression_fit(
247    x: &[f64],
248    y: &[f64],
249    n: usize,
250    m: usize,
251    lambda: f64,
252    with_intercept: bool,
253) -> RidgeResult {
254    if n == 0 || m == 0 || y.len() != n || x.len() != n * m {
255        return RidgeResult {
256            coefficients: Vec::new(),
257            intercept: 0.0,
258            fitted_values: Vec::new(),
259            residuals: Vec::new(),
260            r_squared: 0.0,
261            lambda,
262            error: Some("Invalid input dimensions".to_string()),
263        };
264    }
265
266    // Convert to faer Mat format
267    let x_faer = faer::Mat::from_fn(n, m, |i, j| x[i + j * n]);
268    let y_faer = faer::Col::from_fn(n, |i| y[i]);
269
270    // Build and fit the ridge regressor
271    let regressor = RidgeRegressor::builder()
272        .with_intercept(with_intercept)
273        .lambda(lambda)
274        .build();
275
276    let fitted = match regressor.fit(&x_faer, &y_faer) {
277        Ok(f) => f,
278        Err(e) => {
279            return RidgeResult {
280                coefficients: Vec::new(),
281                intercept: 0.0,
282                fitted_values: Vec::new(),
283                residuals: Vec::new(),
284                r_squared: 0.0,
285                lambda,
286                error: Some(format!("Fit failed: {:?}", e)),
287            }
288        }
289    };
290
291    // Extract coefficients
292    let coefs = fitted.coefficients();
293    let coefficients: Vec<f64> = (0..coefs.nrows()).map(|i| coefs[i]).collect();
294
295    // Get intercept
296    let intercept = fitted.intercept().unwrap_or(0.0);
297
298    // Compute fitted values
299    let mut fitted_values = vec![0.0; n];
300    for i in 0..n {
301        let mut pred = intercept;
302        for j in 0..m {
303            pred += x[i + j * n] * coefficients[j];
304        }
305        fitted_values[i] = pred;
306    }
307
308    // Compute residuals
309    let residuals: Vec<f64> = y
310        .iter()
311        .zip(fitted_values.iter())
312        .map(|(&yi, &yhat)| yi - yhat)
313        .collect();
314
315    // Compute R-squared
316    let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
317    let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
318    let ss_res: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
319    let r_squared = if ss_tot > 0.0 {
320        1.0 - ss_res / ss_tot
321    } else {
322        0.0
323    };
324
325    RidgeResult {
326        coefficients,
327        intercept,
328        fitted_values,
329        residuals,
330        r_squared,
331        lambda,
332        error: None,
333    }
334}
335
336#[cfg(test)]
337mod tests {
338    use super::*;
339    use std::f64::consts::PI;
340
341    /// Generate functional data with known structure for testing
342    fn generate_test_fdata(n: usize, m: usize) -> (Vec<f64>, Vec<f64>) {
343        let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
344
345        // Create n curves: sine waves with varying phase
346        let mut data = vec![0.0; n * m];
347        for i in 0..n {
348            let phase = (i as f64 / n as f64) * PI;
349            for j in 0..m {
350                data[i + j * n] = (2.0 * PI * t[j] + phase).sin();
351            }
352        }
353
354        (data, t)
355    }
356
357    // ============== FPCA tests ==============
358
359    #[test]
360    fn test_fdata_to_pc_1d_basic() {
361        let n = 20;
362        let m = 50;
363        let ncomp = 3;
364        let (data, _) = generate_test_fdata(n, m);
365
366        let result = fdata_to_pc_1d(&data, n, m, ncomp);
367        assert!(result.is_some());
368
369        let fpca = result.unwrap();
370        assert_eq!(fpca.singular_values.len(), ncomp);
371        assert_eq!(fpca.rotation.len(), m * ncomp);
372        assert_eq!(fpca.scores.len(), n * ncomp);
373        assert_eq!(fpca.mean.len(), m);
374        assert_eq!(fpca.centered.len(), n * m);
375    }
376
377    #[test]
378    fn test_fdata_to_pc_1d_singular_values_decreasing() {
379        let n = 20;
380        let m = 50;
381        let ncomp = 5;
382        let (data, _) = generate_test_fdata(n, m);
383
384        let fpca = fdata_to_pc_1d(&data, n, m, ncomp).unwrap();
385
386        // Singular values should be in decreasing order
387        for i in 1..fpca.singular_values.len() {
388            assert!(
389                fpca.singular_values[i] <= fpca.singular_values[i - 1] + 1e-10,
390                "Singular values should be decreasing"
391            );
392        }
393    }
394
395    #[test]
396    fn test_fdata_to_pc_1d_centered_has_zero_mean() {
397        let n = 20;
398        let m = 50;
399        let (data, _) = generate_test_fdata(n, m);
400
401        let fpca = fdata_to_pc_1d(&data, n, m, 3).unwrap();
402
403        // Column means of centered data should be zero
404        for j in 0..m {
405            let col_mean: f64 = (0..n).map(|i| fpca.centered[i + j * n]).sum::<f64>() / n as f64;
406            assert!(
407                col_mean.abs() < 1e-10,
408                "Centered data should have zero column mean"
409            );
410        }
411    }
412
413    #[test]
414    fn test_fdata_to_pc_1d_ncomp_limits() {
415        let n = 10;
416        let m = 50;
417        let (data, _) = generate_test_fdata(n, m);
418
419        // Request more components than n - should cap at n
420        let fpca = fdata_to_pc_1d(&data, n, m, 20).unwrap();
421        assert!(fpca.singular_values.len() <= n);
422    }
423
424    #[test]
425    fn test_fdata_to_pc_1d_invalid_input() {
426        // Empty data
427        let result = fdata_to_pc_1d(&[], 0, 50, 3);
428        assert!(result.is_none());
429
430        // Wrong data length
431        let data = vec![0.0; 100];
432        let result = fdata_to_pc_1d(&data, 10, 20, 3); // Should be 10*20=200
433        assert!(result.is_none());
434
435        // Zero components
436        let (data, _) = generate_test_fdata(10, 50);
437        let result = fdata_to_pc_1d(&data, 10, 50, 0);
438        assert!(result.is_none());
439    }
440
441    #[test]
442    fn test_fdata_to_pc_1d_reconstruction() {
443        let n = 10;
444        let m = 30;
445        let (data, _) = generate_test_fdata(n, m);
446
447        // Use all components for perfect reconstruction
448        let ncomp = n.min(m);
449        let fpca = fdata_to_pc_1d(&data, n, m, ncomp).unwrap();
450
451        // Reconstruct: X_centered = scores * rotation^T
452        // But scores = U * S, rotation = V
453        // So X_centered ≈ sum_k (score_k * loading_k)
454        for i in 0..n {
455            for j in 0..m {
456                let mut reconstructed = 0.0;
457                for k in 0..ncomp {
458                    let score = fpca.scores[i + k * n];
459                    let loading = fpca.rotation[j + k * m];
460                    reconstructed += score * loading;
461                }
462                let original_centered = fpca.centered[i + j * n];
463                assert!(
464                    (reconstructed - original_centered).abs() < 0.1,
465                    "Reconstruction error at ({}, {}): {} vs {}",
466                    i,
467                    j,
468                    reconstructed,
469                    original_centered
470                );
471            }
472        }
473    }
474
475    // ============== PLS tests ==============
476
477    #[test]
478    fn test_fdata_to_pls_1d_basic() {
479        let n = 20;
480        let m = 30;
481        let ncomp = 3;
482        let (x, _) = generate_test_fdata(n, m);
483
484        // Create y with some relationship to x
485        let y: Vec<f64> = (0..n).map(|i| (i as f64 / n as f64) + 0.1).collect();
486
487        let result = fdata_to_pls_1d(&x, n, m, &y, ncomp);
488        assert!(result.is_some());
489
490        let pls = result.unwrap();
491        assert_eq!(pls.weights.len(), m * ncomp);
492        assert_eq!(pls.scores.len(), n * ncomp);
493        assert_eq!(pls.loadings.len(), m * ncomp);
494    }
495
496    #[test]
497    fn test_fdata_to_pls_1d_weights_normalized() {
498        let n = 20;
499        let m = 30;
500        let ncomp = 2;
501        let (x, _) = generate_test_fdata(n, m);
502        let y: Vec<f64> = (0..n).map(|i| i as f64).collect();
503
504        let pls = fdata_to_pls_1d(&x, n, m, &y, ncomp).unwrap();
505
506        // Weight vectors should be approximately unit norm
507        for k in 0..ncomp {
508            let norm: f64 = (0..m)
509                .map(|j| pls.weights[j + k * m].powi(2))
510                .sum::<f64>()
511                .sqrt();
512            assert!(
513                (norm - 1.0).abs() < 0.1,
514                "Weight vector {} should be unit norm, got {}",
515                k,
516                norm
517            );
518        }
519    }
520
521    #[test]
522    fn test_fdata_to_pls_1d_invalid_input() {
523        let (x, _) = generate_test_fdata(10, 30);
524        let y = vec![0.0; 10];
525
526        // Wrong y length
527        let result = fdata_to_pls_1d(&x, 10, 30, &[0.0; 5], 2);
528        assert!(result.is_none());
529
530        // Zero components
531        let result = fdata_to_pls_1d(&x, 10, 30, &y, 0);
532        assert!(result.is_none());
533    }
534
535    // ============== Ridge regression tests ==============
536
537    #[test]
538    fn test_ridge_regression_fit_basic() {
539        let n = 50;
540        let m = 5;
541
542        // Create X with known structure
543        let mut x = vec![0.0; n * m];
544        for i in 0..n {
545            for j in 0..m {
546                x[i + j * n] = (i as f64 + j as f64) / (n + m) as f64;
547            }
548        }
549
550        // Create y = sum of x columns + noise
551        let y: Vec<f64> = (0..n)
552            .map(|i| {
553                let mut sum = 0.0;
554                for j in 0..m {
555                    sum += x[i + j * n];
556                }
557                sum + 0.01 * (i as f64 % 10.0)
558            })
559            .collect();
560
561        let result = ridge_regression_fit(&x, &y, n, m, 0.1, true);
562
563        assert!(result.error.is_none(), "Ridge should fit without error");
564        assert_eq!(result.coefficients.len(), m);
565        assert_eq!(result.fitted_values.len(), n);
566        assert_eq!(result.residuals.len(), n);
567    }
568
569    #[test]
570    fn test_ridge_regression_fit_r_squared() {
571        let n = 50;
572        let m = 3;
573
574        // Create perfect linear relationship
575        let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
576        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
577
578        let result = ridge_regression_fit(&x, &y, n, m, 0.01, true);
579
580        // R-squared should be high for good fit
581        assert!(
582            result.r_squared > 0.5,
583            "R-squared should be high, got {}",
584            result.r_squared
585        );
586        assert!(result.r_squared <= 1.0 + 1e-10, "R-squared should be <= 1");
587    }
588
589    #[test]
590    fn test_ridge_regression_fit_regularization() {
591        let n = 30;
592        let m = 10;
593
594        // Create X
595        let x: Vec<f64> = (0..n * m)
596            .map(|i| ((i * 17) % 100) as f64 / 100.0)
597            .collect();
598        let y: Vec<f64> = (0..n).map(|i| (i as f64).sin()).collect();
599
600        let low_lambda = ridge_regression_fit(&x, &y, n, m, 0.001, true);
601        let high_lambda = ridge_regression_fit(&x, &y, n, m, 100.0, true);
602
603        // Higher lambda should give smaller coefficient norm
604        let norm_low: f64 = low_lambda
605            .coefficients
606            .iter()
607            .map(|c| c.powi(2))
608            .sum::<f64>()
609            .sqrt();
610        let norm_high: f64 = high_lambda
611            .coefficients
612            .iter()
613            .map(|c| c.powi(2))
614            .sum::<f64>()
615            .sqrt();
616
617        assert!(
618            norm_high <= norm_low + 1e-6,
619            "Higher lambda should shrink coefficients: {} vs {}",
620            norm_high,
621            norm_low
622        );
623    }
624
625    #[test]
626    fn test_ridge_regression_fit_residuals() {
627        let n = 20;
628        let m = 3;
629
630        let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
631        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
632
633        let result = ridge_regression_fit(&x, &y, n, m, 0.1, true);
634
635        // Check residuals = y - fitted
636        for i in 0..n {
637            let expected_resid = y[i] - result.fitted_values[i];
638            assert!(
639                (result.residuals[i] - expected_resid).abs() < 1e-10,
640                "Residual mismatch at {}",
641                i
642            );
643        }
644    }
645
646    #[test]
647    fn test_ridge_regression_fit_no_intercept() {
648        let n = 30;
649        let m = 5;
650
651        let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
652        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
653
654        let result = ridge_regression_fit(&x, &y, n, m, 0.1, false);
655
656        assert!(result.error.is_none());
657        // Intercept should be 0 when not fitting
658        assert!(
659            result.intercept.abs() < 1e-10,
660            "Intercept should be 0, got {}",
661            result.intercept
662        );
663    }
664
665    #[test]
666    fn test_ridge_regression_fit_invalid_input() {
667        // Empty data
668        let result = ridge_regression_fit(&[], &[], 0, 5, 0.1, true);
669        assert!(result.error.is_some());
670
671        // Mismatched dimensions
672        let x = vec![0.0; 50];
673        let y = vec![0.0; 10];
674        let result = ridge_regression_fit(&x, &y, 10, 10, 0.1, true); // x should be 10*10=100
675        assert!(result.error.is_some());
676    }
677}