fdars_core/
regression.rs

1//! Regression functions for functional data.
2//!
3//! This module provides functional PCA, PLS, and ridge regression.
4
5use crate::iter_maybe_parallel;
6#[cfg(feature = "linalg")]
7use anofox_regression::solvers::RidgeRegressor;
8#[cfg(feature = "linalg")]
9use anofox_regression::{FittedRegressor, Regressor};
10use nalgebra::{DMatrix, SVD};
11#[cfg(feature = "parallel")]
12use rayon::iter::ParallelIterator;
13
14/// Result of functional PCA.
15pub struct FpcaResult {
16    /// Singular values
17    pub singular_values: Vec<f64>,
18    /// Rotation matrix (loadings), m x ncomp, column-major
19    pub rotation: Vec<f64>,
20    /// Scores matrix, n x ncomp, column-major
21    pub scores: Vec<f64>,
22    /// Mean function
23    pub mean: Vec<f64>,
24    /// Centered data
25    pub centered: Vec<f64>,
26}
27
28/// Perform functional PCA via SVD on centered data.
29///
30/// # Arguments
31/// * `data` - Column-major matrix (n x m)
32/// * `n` - Number of observations
33/// * `m` - Number of evaluation points
34/// * `ncomp` - Number of components to extract
35pub fn fdata_to_pc_1d(data: &[f64], n: usize, m: usize, ncomp: usize) -> Option<FpcaResult> {
36    if n == 0 || m == 0 || ncomp < 1 || data.len() != n * m {
37        return None;
38    }
39
40    let ncomp = ncomp.min(n).min(m);
41
42    // Compute column means
43    let means: Vec<f64> = iter_maybe_parallel!(0..m)
44        .map(|j| {
45            let mut sum = 0.0;
46            for i in 0..n {
47                sum += data[i + j * n];
48            }
49            sum / n as f64
50        })
51        .collect();
52
53    // Center the data
54    let centered_data: Vec<f64> = (0..(n * m))
55        .map(|idx| {
56            let i = idx % n;
57            let j = idx / n;
58            data[i + j * n] - means[j]
59        })
60        .collect();
61
62    // Create nalgebra DMatrix
63    let matrix = DMatrix::from_column_slice(n, m, &centered_data);
64
65    // Compute SVD
66    let svd = SVD::new(matrix, true, true);
67
68    // Extract singular values
69    let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).cloned().collect();
70
71    // Extract V (right singular vectors)
72    let v_t = svd.v_t.as_ref()?;
73    let rotation_data: Vec<f64> = (0..ncomp)
74        .flat_map(|k| (0..m).map(move |j| v_t[(k, j)]))
75        .collect();
76
77    // Compute scores: X_centered * V = U * S
78    let u = svd.u.as_ref()?;
79    let mut scores_data: Vec<f64> = Vec::with_capacity(n * ncomp);
80    for k in 0..ncomp {
81        let sv_k = singular_values[k];
82        for i in 0..n {
83            scores_data.push(u[(i, k)] * sv_k);
84        }
85    }
86
87    Some(FpcaResult {
88        singular_values,
89        rotation: rotation_data,
90        scores: scores_data,
91        mean: means,
92        centered: centered_data,
93    })
94}
95
96/// Result of PLS regression.
97pub struct PlsResult {
98    /// Weight vectors, m x ncomp
99    pub weights: Vec<f64>,
100    /// Score vectors, n x ncomp
101    pub scores: Vec<f64>,
102    /// Loading vectors, m x ncomp
103    pub loadings: Vec<f64>,
104}
105
106/// Perform PLS via NIPALS algorithm.
107pub fn fdata_to_pls_1d(
108    data: &[f64],
109    n: usize,
110    m: usize,
111    y: &[f64],
112    ncomp: usize,
113) -> Option<PlsResult> {
114    if n == 0 || m == 0 || y.len() != n || ncomp < 1 || data.len() != n * m {
115        return None;
116    }
117
118    let ncomp = ncomp.min(n).min(m);
119
120    // Center X and y
121    let x_means: Vec<f64> = (0..m)
122        .map(|j| {
123            let mut sum = 0.0;
124            for i in 0..n {
125                sum += data[i + j * n];
126            }
127            sum / n as f64
128        })
129        .collect();
130
131    let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
132
133    let mut x_cen: Vec<f64> = (0..(n * m))
134        .map(|idx| {
135            let i = idx % n;
136            let j = idx / n;
137            data[i + j * n] - x_means[j]
138        })
139        .collect();
140
141    let mut y_cen: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
142
143    let mut weights = vec![0.0; m * ncomp];
144    let mut scores = vec![0.0; n * ncomp];
145    let mut loadings = vec![0.0; m * ncomp];
146
147    // NIPALS algorithm
148    for k in 0..ncomp {
149        // w = X'y / ||X'y||
150        let mut w: Vec<f64> = (0..m)
151            .map(|j| {
152                let mut sum = 0.0;
153                for i in 0..n {
154                    sum += x_cen[i + j * n] * y_cen[i];
155                }
156                sum
157            })
158            .collect();
159
160        let w_norm: f64 = w.iter().map(|&wi| wi * wi).sum::<f64>().sqrt();
161        if w_norm > 1e-10 {
162            for wi in &mut w {
163                *wi /= w_norm;
164            }
165        }
166
167        // t = Xw
168        let t: Vec<f64> = (0..n)
169            .map(|i| {
170                let mut sum = 0.0;
171                for j in 0..m {
172                    sum += x_cen[i + j * n] * w[j];
173                }
174                sum
175            })
176            .collect();
177
178        let t_norm_sq: f64 = t.iter().map(|&ti| ti * ti).sum();
179
180        // p = X't / (t't)
181        let p: Vec<f64> = (0..m)
182            .map(|j| {
183                let mut sum = 0.0;
184                for i in 0..n {
185                    sum += x_cen[i + j * n] * t[i];
186                }
187                sum / t_norm_sq.max(1e-10)
188            })
189            .collect();
190
191        // Store results
192        for j in 0..m {
193            weights[j + k * m] = w[j];
194            loadings[j + k * m] = p[j];
195        }
196        for i in 0..n {
197            scores[i + k * n] = t[i];
198        }
199
200        // Deflate X
201        for j in 0..m {
202            for i in 0..n {
203                x_cen[i + j * n] -= t[i] * p[j];
204            }
205        }
206
207        // Deflate y
208        let t_y: f64 = t.iter().zip(y_cen.iter()).map(|(&ti, &yi)| ti * yi).sum();
209        let q = t_y / t_norm_sq.max(1e-10);
210        for i in 0..n {
211            y_cen[i] -= t[i] * q;
212        }
213    }
214
215    Some(PlsResult {
216        weights,
217        scores,
218        loadings,
219    })
220}
221
222/// Result of ridge regression fit.
223#[cfg(feature = "linalg")]
224pub struct RidgeResult {
225    /// Coefficients
226    pub coefficients: Vec<f64>,
227    /// Intercept
228    pub intercept: f64,
229    /// Fitted values
230    pub fitted_values: Vec<f64>,
231    /// Residuals
232    pub residuals: Vec<f64>,
233    /// R-squared
234    pub r_squared: f64,
235    /// Lambda used
236    pub lambda: f64,
237    /// Error message if any
238    pub error: Option<String>,
239}
240
241/// Fit ridge regression.
242///
243/// # Arguments
244/// * `x` - Predictor matrix (column-major, n x m)
245/// * `y` - Response vector
246/// * `n` - Number of observations
247/// * `m` - Number of predictors
248/// * `lambda` - Regularization parameter
249/// * `with_intercept` - Whether to include intercept
250#[cfg(feature = "linalg")]
251pub fn ridge_regression_fit(
252    x: &[f64],
253    y: &[f64],
254    n: usize,
255    m: usize,
256    lambda: f64,
257    with_intercept: bool,
258) -> RidgeResult {
259    if n == 0 || m == 0 || y.len() != n || x.len() != n * m {
260        return RidgeResult {
261            coefficients: Vec::new(),
262            intercept: 0.0,
263            fitted_values: Vec::new(),
264            residuals: Vec::new(),
265            r_squared: 0.0,
266            lambda,
267            error: Some("Invalid input dimensions".to_string()),
268        };
269    }
270
271    // Convert to faer Mat format
272    let x_faer = faer::Mat::from_fn(n, m, |i, j| x[i + j * n]);
273    let y_faer = faer::Col::from_fn(n, |i| y[i]);
274
275    // Build and fit the ridge regressor
276    let regressor = RidgeRegressor::builder()
277        .with_intercept(with_intercept)
278        .lambda(lambda)
279        .build();
280
281    let fitted = match regressor.fit(&x_faer, &y_faer) {
282        Ok(f) => f,
283        Err(e) => {
284            return RidgeResult {
285                coefficients: Vec::new(),
286                intercept: 0.0,
287                fitted_values: Vec::new(),
288                residuals: Vec::new(),
289                r_squared: 0.0,
290                lambda,
291                error: Some(format!("Fit failed: {:?}", e)),
292            }
293        }
294    };
295
296    // Extract coefficients
297    let coefs = fitted.coefficients();
298    let coefficients: Vec<f64> = (0..coefs.nrows()).map(|i| coefs[i]).collect();
299
300    // Get intercept
301    let intercept = fitted.intercept().unwrap_or(0.0);
302
303    // Compute fitted values
304    let mut fitted_values = vec![0.0; n];
305    for i in 0..n {
306        let mut pred = intercept;
307        for j in 0..m {
308            pred += x[i + j * n] * coefficients[j];
309        }
310        fitted_values[i] = pred;
311    }
312
313    // Compute residuals
314    let residuals: Vec<f64> = y
315        .iter()
316        .zip(fitted_values.iter())
317        .map(|(&yi, &yhat)| yi - yhat)
318        .collect();
319
320    // Compute R-squared
321    let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
322    let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
323    let ss_res: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
324    let r_squared = if ss_tot > 0.0 {
325        1.0 - ss_res / ss_tot
326    } else {
327        0.0
328    };
329
330    RidgeResult {
331        coefficients,
332        intercept,
333        fitted_values,
334        residuals,
335        r_squared,
336        lambda,
337        error: None,
338    }
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344    use std::f64::consts::PI;
345
346    /// Generate functional data with known structure for testing
347    fn generate_test_fdata(n: usize, m: usize) -> (Vec<f64>, Vec<f64>) {
348        let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
349
350        // Create n curves: sine waves with varying phase
351        let mut data = vec![0.0; n * m];
352        for i in 0..n {
353            let phase = (i as f64 / n as f64) * PI;
354            for j in 0..m {
355                data[i + j * n] = (2.0 * PI * t[j] + phase).sin();
356            }
357        }
358
359        (data, t)
360    }
361
362    // ============== FPCA tests ==============
363
364    #[test]
365    fn test_fdata_to_pc_1d_basic() {
366        let n = 20;
367        let m = 50;
368        let ncomp = 3;
369        let (data, _) = generate_test_fdata(n, m);
370
371        let result = fdata_to_pc_1d(&data, n, m, ncomp);
372        assert!(result.is_some());
373
374        let fpca = result.unwrap();
375        assert_eq!(fpca.singular_values.len(), ncomp);
376        assert_eq!(fpca.rotation.len(), m * ncomp);
377        assert_eq!(fpca.scores.len(), n * ncomp);
378        assert_eq!(fpca.mean.len(), m);
379        assert_eq!(fpca.centered.len(), n * m);
380    }
381
382    #[test]
383    fn test_fdata_to_pc_1d_singular_values_decreasing() {
384        let n = 20;
385        let m = 50;
386        let ncomp = 5;
387        let (data, _) = generate_test_fdata(n, m);
388
389        let fpca = fdata_to_pc_1d(&data, n, m, ncomp).unwrap();
390
391        // Singular values should be in decreasing order
392        for i in 1..fpca.singular_values.len() {
393            assert!(
394                fpca.singular_values[i] <= fpca.singular_values[i - 1] + 1e-10,
395                "Singular values should be decreasing"
396            );
397        }
398    }
399
400    #[test]
401    fn test_fdata_to_pc_1d_centered_has_zero_mean() {
402        let n = 20;
403        let m = 50;
404        let (data, _) = generate_test_fdata(n, m);
405
406        let fpca = fdata_to_pc_1d(&data, n, m, 3).unwrap();
407
408        // Column means of centered data should be zero
409        for j in 0..m {
410            let col_mean: f64 = (0..n).map(|i| fpca.centered[i + j * n]).sum::<f64>() / n as f64;
411            assert!(
412                col_mean.abs() < 1e-10,
413                "Centered data should have zero column mean"
414            );
415        }
416    }
417
418    #[test]
419    fn test_fdata_to_pc_1d_ncomp_limits() {
420        let n = 10;
421        let m = 50;
422        let (data, _) = generate_test_fdata(n, m);
423
424        // Request more components than n - should cap at n
425        let fpca = fdata_to_pc_1d(&data, n, m, 20).unwrap();
426        assert!(fpca.singular_values.len() <= n);
427    }
428
429    #[test]
430    fn test_fdata_to_pc_1d_invalid_input() {
431        // Empty data
432        let result = fdata_to_pc_1d(&[], 0, 50, 3);
433        assert!(result.is_none());
434
435        // Wrong data length
436        let data = vec![0.0; 100];
437        let result = fdata_to_pc_1d(&data, 10, 20, 3); // Should be 10*20=200
438        assert!(result.is_none());
439
440        // Zero components
441        let (data, _) = generate_test_fdata(10, 50);
442        let result = fdata_to_pc_1d(&data, 10, 50, 0);
443        assert!(result.is_none());
444    }
445
446    #[test]
447    fn test_fdata_to_pc_1d_reconstruction() {
448        let n = 10;
449        let m = 30;
450        let (data, _) = generate_test_fdata(n, m);
451
452        // Use all components for perfect reconstruction
453        let ncomp = n.min(m);
454        let fpca = fdata_to_pc_1d(&data, n, m, ncomp).unwrap();
455
456        // Reconstruct: X_centered = scores * rotation^T
457        // But scores = U * S, rotation = V
458        // So X_centered ≈ sum_k (score_k * loading_k)
459        for i in 0..n {
460            for j in 0..m {
461                let mut reconstructed = 0.0;
462                for k in 0..ncomp {
463                    let score = fpca.scores[i + k * n];
464                    let loading = fpca.rotation[j + k * m];
465                    reconstructed += score * loading;
466                }
467                let original_centered = fpca.centered[i + j * n];
468                assert!(
469                    (reconstructed - original_centered).abs() < 0.1,
470                    "Reconstruction error at ({}, {}): {} vs {}",
471                    i,
472                    j,
473                    reconstructed,
474                    original_centered
475                );
476            }
477        }
478    }
479
480    // ============== PLS tests ==============
481
482    #[test]
483    fn test_fdata_to_pls_1d_basic() {
484        let n = 20;
485        let m = 30;
486        let ncomp = 3;
487        let (x, _) = generate_test_fdata(n, m);
488
489        // Create y with some relationship to x
490        let y: Vec<f64> = (0..n).map(|i| (i as f64 / n as f64) + 0.1).collect();
491
492        let result = fdata_to_pls_1d(&x, n, m, &y, ncomp);
493        assert!(result.is_some());
494
495        let pls = result.unwrap();
496        assert_eq!(pls.weights.len(), m * ncomp);
497        assert_eq!(pls.scores.len(), n * ncomp);
498        assert_eq!(pls.loadings.len(), m * ncomp);
499    }
500
501    #[test]
502    fn test_fdata_to_pls_1d_weights_normalized() {
503        let n = 20;
504        let m = 30;
505        let ncomp = 2;
506        let (x, _) = generate_test_fdata(n, m);
507        let y: Vec<f64> = (0..n).map(|i| i as f64).collect();
508
509        let pls = fdata_to_pls_1d(&x, n, m, &y, ncomp).unwrap();
510
511        // Weight vectors should be approximately unit norm
512        for k in 0..ncomp {
513            let norm: f64 = (0..m)
514                .map(|j| pls.weights[j + k * m].powi(2))
515                .sum::<f64>()
516                .sqrt();
517            assert!(
518                (norm - 1.0).abs() < 0.1,
519                "Weight vector {} should be unit norm, got {}",
520                k,
521                norm
522            );
523        }
524    }
525
526    #[test]
527    fn test_fdata_to_pls_1d_invalid_input() {
528        let (x, _) = generate_test_fdata(10, 30);
529        let y = vec![0.0; 10];
530
531        // Wrong y length
532        let result = fdata_to_pls_1d(&x, 10, 30, &[0.0; 5], 2);
533        assert!(result.is_none());
534
535        // Zero components
536        let result = fdata_to_pls_1d(&x, 10, 30, &y, 0);
537        assert!(result.is_none());
538    }
539
540    // ============== Ridge regression tests ==============
541
542    #[test]
543    fn test_ridge_regression_fit_basic() {
544        let n = 50;
545        let m = 5;
546
547        // Create X with known structure
548        let mut x = vec![0.0; n * m];
549        for i in 0..n {
550            for j in 0..m {
551                x[i + j * n] = (i as f64 + j as f64) / (n + m) as f64;
552            }
553        }
554
555        // Create y = sum of x columns + noise
556        let y: Vec<f64> = (0..n)
557            .map(|i| {
558                let mut sum = 0.0;
559                for j in 0..m {
560                    sum += x[i + j * n];
561                }
562                sum + 0.01 * (i as f64 % 10.0)
563            })
564            .collect();
565
566        let result = ridge_regression_fit(&x, &y, n, m, 0.1, true);
567
568        assert!(result.error.is_none(), "Ridge should fit without error");
569        assert_eq!(result.coefficients.len(), m);
570        assert_eq!(result.fitted_values.len(), n);
571        assert_eq!(result.residuals.len(), n);
572    }
573
574    #[test]
575    fn test_ridge_regression_fit_r_squared() {
576        let n = 50;
577        let m = 3;
578
579        // Create perfect linear relationship
580        let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
581        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
582
583        let result = ridge_regression_fit(&x, &y, n, m, 0.01, true);
584
585        // R-squared should be high for good fit
586        assert!(
587            result.r_squared > 0.5,
588            "R-squared should be high, got {}",
589            result.r_squared
590        );
591        assert!(result.r_squared <= 1.0 + 1e-10, "R-squared should be <= 1");
592    }
593
594    #[test]
595    fn test_ridge_regression_fit_regularization() {
596        let n = 30;
597        let m = 10;
598
599        // Create X
600        let x: Vec<f64> = (0..n * m)
601            .map(|i| ((i * 17) % 100) as f64 / 100.0)
602            .collect();
603        let y: Vec<f64> = (0..n).map(|i| (i as f64).sin()).collect();
604
605        let low_lambda = ridge_regression_fit(&x, &y, n, m, 0.001, true);
606        let high_lambda = ridge_regression_fit(&x, &y, n, m, 100.0, true);
607
608        // Higher lambda should give smaller coefficient norm
609        let norm_low: f64 = low_lambda
610            .coefficients
611            .iter()
612            .map(|c| c.powi(2))
613            .sum::<f64>()
614            .sqrt();
615        let norm_high: f64 = high_lambda
616            .coefficients
617            .iter()
618            .map(|c| c.powi(2))
619            .sum::<f64>()
620            .sqrt();
621
622        assert!(
623            norm_high <= norm_low + 1e-6,
624            "Higher lambda should shrink coefficients: {} vs {}",
625            norm_high,
626            norm_low
627        );
628    }
629
630    #[test]
631    fn test_ridge_regression_fit_residuals() {
632        let n = 20;
633        let m = 3;
634
635        let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
636        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
637
638        let result = ridge_regression_fit(&x, &y, n, m, 0.1, true);
639
640        // Check residuals = y - fitted
641        for i in 0..n {
642            let expected_resid = y[i] - result.fitted_values[i];
643            assert!(
644                (result.residuals[i] - expected_resid).abs() < 1e-10,
645                "Residual mismatch at {}",
646                i
647            );
648        }
649    }
650
651    #[test]
652    fn test_ridge_regression_fit_no_intercept() {
653        let n = 30;
654        let m = 5;
655
656        let x: Vec<f64> = (0..n * m).map(|i| i as f64 / (n * m) as f64).collect();
657        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
658
659        let result = ridge_regression_fit(&x, &y, n, m, 0.1, false);
660
661        assert!(result.error.is_none());
662        // Intercept should be 0 when not fitting
663        assert!(
664            result.intercept.abs() < 1e-10,
665            "Intercept should be 0, got {}",
666            result.intercept
667        );
668    }
669
670    #[test]
671    fn test_ridge_regression_fit_invalid_input() {
672        // Empty data
673        let result = ridge_regression_fit(&[], &[], 0, 5, 0.1, true);
674        assert!(result.error.is_some());
675
676        // Mismatched dimensions
677        let x = vec![0.0; 50];
678        let y = vec![0.0; 10];
679        let result = ridge_regression_fit(&x, &y, 10, 10, 0.1, true); // x should be 10*10=100
680        assert!(result.error.is_some());
681    }
682}