fdars_core/
regression.rs

1//! Regression functions for functional data.
2//!
3//! This module provides functional PCA, PLS, and ridge regression.
4
5use crate::error::FdarError;
6use crate::matrix::FdMatrix;
7#[cfg(feature = "linalg")]
8use anofox_regression::solvers::RidgeRegressor;
9#[cfg(feature = "linalg")]
10use anofox_regression::{FittedRegressor, Regressor};
11use nalgebra::SVD;
12
13/// Result of functional PCA.
14#[derive(Debug, Clone, PartialEq)]
15pub struct FpcaResult {
16    /// Singular values
17    pub singular_values: Vec<f64>,
18    /// Rotation matrix (loadings), m x ncomp
19    pub rotation: FdMatrix,
20    /// Scores matrix, n x ncomp
21    pub scores: FdMatrix,
22    /// Mean function
23    pub mean: Vec<f64>,
24    /// Centered data, n x m
25    pub centered: FdMatrix,
26}
27
28impl FpcaResult {
29    /// Project new functional data onto the FPC score space.
30    ///
31    /// Centers the input data by subtracting the mean function estimated
32    /// during FPCA, then multiplies by the rotation (loadings) matrix to
33    /// obtain FPC scores for the new observations.
34    ///
35    /// # Arguments
36    /// * `data` - Matrix (n_new x m) of new observations
37    ///
38    /// # Errors
39    ///
40    /// Returns [`FdarError::InvalidDimension`] if the number of columns in
41    /// `data` does not match the length of the mean vector (i.e. the number
42    /// of evaluation points used during FPCA).
43    ///
44    /// # Examples
45    ///
46    /// ```
47    /// use fdars_core::matrix::FdMatrix;
48    /// use fdars_core::regression::fdata_to_pc_1d;
49    ///
50    /// let data = FdMatrix::from_column_major(
51    ///     (0..50).map(|i| (i as f64 * 0.1).sin()).collect(),
52    ///     5, 10,
53    /// ).unwrap();
54    /// let fpca = fdata_to_pc_1d(&data, 3).unwrap();
55    ///
56    /// // Project the original data (scores should match)
57    /// let scores = fpca.project(&data).unwrap();
58    /// assert_eq!(scores.shape(), (5, 3));
59    ///
60    /// // Project new data
61    /// let new_data = FdMatrix::from_column_major(
62    ///     (0..20).map(|i| (i as f64 * 0.2).cos()).collect(),
63    ///     2, 10,
64    /// ).unwrap();
65    /// let new_scores = fpca.project(&new_data).unwrap();
66    /// assert_eq!(new_scores.shape(), (2, 3));
67    /// ```
68    pub fn project(&self, data: &FdMatrix) -> Result<FdMatrix, FdarError> {
69        let (n, m) = data.shape();
70        let ncomp = self.rotation.ncols();
71        if m != self.mean.len() {
72            return Err(FdarError::InvalidDimension {
73                parameter: "data",
74                expected: format!("{} columns", self.mean.len()),
75                actual: format!("{m} columns"),
76            });
77        }
78
79        let mut scores = FdMatrix::zeros(n, ncomp);
80        for i in 0..n {
81            for k in 0..ncomp {
82                let mut sum = 0.0;
83                for j in 0..m {
84                    sum += (data[(i, j)] - self.mean[j]) * self.rotation[(j, k)];
85                }
86                scores[(i, k)] = sum;
87            }
88        }
89        Ok(scores)
90    }
91
92    /// Reconstruct functional data from FPC scores.
93    ///
94    /// Computes the approximation of functional data using the first
95    /// `ncomp` principal components:
96    /// `data[i, j] = mean[j] + sum_k scores[i, k] * rotation[j, k]`
97    ///
98    /// # Arguments
99    /// * `scores` - Score matrix (n x p) where p >= `ncomp`
100    /// * `ncomp` - Number of components to use for reconstruction
101    ///
102    /// # Errors
103    ///
104    /// Returns [`FdarError::InvalidParameter`] if `ncomp` is zero or exceeds
105    /// the number of columns in `scores` or the number of available components
106    /// in the rotation matrix.
107    ///
108    /// # Examples
109    ///
110    /// ```
111    /// use fdars_core::matrix::FdMatrix;
112    /// use fdars_core::regression::fdata_to_pc_1d;
113    ///
114    /// let data = FdMatrix::from_column_major(
115    ///     (0..100).map(|i| (i as f64 * 0.1).sin()).collect(),
116    ///     10, 10,
117    /// ).unwrap();
118    /// let fpca = fdata_to_pc_1d(&data, 5).unwrap();
119    ///
120    /// // Reconstruct using all 5 components
121    /// let recon = fpca.reconstruct(&fpca.scores, 5).unwrap();
122    /// assert_eq!(recon.shape(), (10, 10));
123    ///
124    /// // Reconstruct using fewer components
125    /// let recon2 = fpca.reconstruct(&fpca.scores, 2).unwrap();
126    /// assert_eq!(recon2.shape(), (10, 10));
127    /// ```
128    pub fn reconstruct(&self, scores: &FdMatrix, ncomp: usize) -> Result<FdMatrix, FdarError> {
129        let (n, p) = scores.shape();
130        let m = self.mean.len();
131        let max_comp = self.rotation.ncols().min(p);
132        if ncomp == 0 {
133            return Err(FdarError::InvalidParameter {
134                parameter: "ncomp",
135                message: "ncomp must be >= 1".to_string(),
136            });
137        }
138        if ncomp > max_comp {
139            return Err(FdarError::InvalidParameter {
140                parameter: "ncomp",
141                message: format!("ncomp={ncomp} exceeds available components ({max_comp})"),
142            });
143        }
144
145        let mut recon = FdMatrix::zeros(n, m);
146        for i in 0..n {
147            for j in 0..m {
148                let mut val = self.mean[j];
149                for k in 0..ncomp {
150                    val += scores[(i, k)] * self.rotation[(j, k)];
151                }
152                recon[(i, j)] = val;
153            }
154        }
155        Ok(recon)
156    }
157}
158
159/// Center columns of a matrix and return (centered_matrix, column_means).
160fn center_columns(data: &FdMatrix) -> (FdMatrix, Vec<f64>) {
161    let (n, m) = data.shape();
162    let mut centered = FdMatrix::zeros(n, m);
163    let mut means = vec![0.0; m];
164    for j in 0..m {
165        let col = data.column(j);
166        let mean = col.iter().sum::<f64>() / n as f64;
167        means[j] = mean;
168        let out_col = centered.column_mut(j);
169        for i in 0..n {
170            out_col[i] = col[i] - mean;
171        }
172    }
173    (centered, means)
174}
175
176/// Extract rotation (V) and scores (U*S) from SVD results.
177fn extract_pc_components(
178    svd: &SVD<f64, nalgebra::Dyn, nalgebra::Dyn>,
179    n: usize,
180    m: usize,
181    ncomp: usize,
182) -> Option<(Vec<f64>, FdMatrix, FdMatrix)> {
183    let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).copied().collect();
184
185    let v_t = svd.v_t.as_ref()?;
186    let mut rotation = FdMatrix::zeros(m, ncomp);
187    for k in 0..ncomp {
188        for j in 0..m {
189            rotation[(j, k)] = v_t[(k, j)];
190        }
191    }
192
193    let u = svd.u.as_ref()?;
194    let mut scores = FdMatrix::zeros(n, ncomp);
195    for k in 0..ncomp {
196        let sv_k = singular_values[k];
197        for i in 0..n {
198            scores[(i, k)] = u[(i, k)] * sv_k;
199        }
200    }
201
202    Some((singular_values, rotation, scores))
203}
204
205/// Perform functional PCA via SVD on centered data.
206///
207/// # Arguments
208/// * `data` - Matrix (n x m): n observations, m evaluation points
209/// * `ncomp` - Number of components to extract
210///
211/// # Errors
212///
213/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows or zero columns.
214/// Returns [`FdarError::InvalidParameter`] if `ncomp` is zero.
215/// Returns [`FdarError::ComputationFailed`] if the SVD decomposition fails to
216/// produce U or V_t matrices.
217///
218/// # Examples
219///
220/// ```
221/// use fdars_core::matrix::FdMatrix;
222/// use fdars_core::regression::fdata_to_pc_1d;
223///
224/// // 5 curves, each evaluated at 10 points
225/// let data = FdMatrix::from_column_major(
226///     (0..50).map(|i| (i as f64 * 0.1).sin()).collect(),
227///     5, 10,
228/// ).unwrap();
229/// let result = fdata_to_pc_1d(&data, 3).unwrap();
230/// assert_eq!(result.scores.shape(), (5, 3));
231/// assert_eq!(result.rotation.shape(), (10, 3));
232/// assert_eq!(result.mean.len(), 10);
233/// ```
234#[must_use = "expensive computation whose result should not be discarded"]
235pub fn fdata_to_pc_1d(data: &FdMatrix, ncomp: usize) -> Result<FpcaResult, FdarError> {
236    let (n, m) = data.shape();
237    if n == 0 {
238        return Err(FdarError::InvalidDimension {
239            parameter: "data",
240            expected: "n > 0 rows".to_string(),
241            actual: format!("n = {n}"),
242        });
243    }
244    if m == 0 {
245        return Err(FdarError::InvalidDimension {
246            parameter: "data",
247            expected: "m > 0 columns".to_string(),
248            actual: format!("m = {m}"),
249        });
250    }
251    if ncomp < 1 {
252        return Err(FdarError::InvalidParameter {
253            parameter: "ncomp",
254            message: format!("ncomp must be >= 1, got {ncomp}"),
255        });
256    }
257
258    let ncomp = ncomp.min(n).min(m);
259    let (centered, means) = center_columns(data);
260    let svd = SVD::new(centered.to_dmatrix(), true, true);
261    let (singular_values, rotation, scores) =
262        extract_pc_components(&svd, n, m, ncomp).ok_or_else(|| FdarError::ComputationFailed {
263            operation: "SVD",
264            detail: "failed to extract U or V_t from SVD decomposition; try reducing ncomp or check for zero-variance columns in the data".to_string(),
265        })?;
266
267    Ok(FpcaResult {
268        singular_values,
269        rotation,
270        scores,
271        mean: means,
272        centered,
273    })
274}
275
276/// Result of PLS regression.
277#[derive(Debug, Clone, PartialEq)]
278pub struct PlsResult {
279    /// Weight vectors, m x ncomp
280    pub weights: FdMatrix,
281    /// Score vectors, n x ncomp
282    pub scores: FdMatrix,
283    /// Loading vectors, m x ncomp
284    pub loadings: FdMatrix,
285    /// Column means of the training data, length m
286    pub x_means: Vec<f64>,
287}
288
289impl PlsResult {
290    /// Project new functional data onto the PLS score space.
291    ///
292    /// Centers the input data by subtracting the column means estimated
293    /// during PLS fitting, then iteratively projects and deflates through
294    /// each PLS component using the stored weight and loading vectors.
295    ///
296    /// # Arguments
297    /// * `data` - Matrix (n_new x m) of new observations
298    ///
299    /// # Errors
300    ///
301    /// Returns [`FdarError::InvalidDimension`] if the number of columns in
302    /// `data` does not match the number of predictor variables used during
303    /// PLS fitting.
304    ///
305    /// # Examples
306    ///
307    /// ```
308    /// use fdars_core::matrix::FdMatrix;
309    /// use fdars_core::regression::fdata_to_pls_1d;
310    ///
311    /// let x = FdMatrix::from_column_major(
312    ///     (0..100).map(|i| (i as f64 * 0.1).sin()).collect(),
313    ///     10, 10,
314    /// ).unwrap();
315    /// let y: Vec<f64> = (0..10).map(|i| i as f64 * 0.5).collect();
316    /// let pls = fdata_to_pls_1d(&x, &y, 3).unwrap();
317    ///
318    /// // Project the original data
319    /// let scores = pls.project(&x).unwrap();
320    /// assert_eq!(scores.shape(), (10, 3));
321    ///
322    /// // Project new data
323    /// let new_x = FdMatrix::from_column_major(
324    ///     (0..20).map(|i| (i as f64 * 0.2).cos()).collect(),
325    ///     2, 10,
326    /// ).unwrap();
327    /// let new_scores = pls.project(&new_x).unwrap();
328    /// assert_eq!(new_scores.shape(), (2, 3));
329    /// ```
330    pub fn project(&self, data: &FdMatrix) -> Result<FdMatrix, FdarError> {
331        let (n, m) = data.shape();
332        let ncomp = self.weights.ncols();
333        if m != self.x_means.len() {
334            return Err(FdarError::InvalidDimension {
335                parameter: "data",
336                expected: format!("{} columns", self.x_means.len()),
337                actual: format!("{m} columns"),
338            });
339        }
340
341        // Center data
342        let mut x_cen = FdMatrix::zeros(n, m);
343        for j in 0..m {
344            for i in 0..n {
345                x_cen[(i, j)] = data[(i, j)] - self.x_means[j];
346            }
347        }
348
349        // Iteratively project and deflate through each component
350        let mut scores = FdMatrix::zeros(n, ncomp);
351        for k in 0..ncomp {
352            // Compute scores: t = X_cen * w_k
353            for i in 0..n {
354                let mut sum = 0.0;
355                for j in 0..m {
356                    sum += x_cen[(i, j)] * self.weights[(j, k)];
357                }
358                scores[(i, k)] = sum;
359            }
360
361            // Deflate: X_cen -= t * p_k'
362            for j in 0..m {
363                let p_jk = self.loadings[(j, k)];
364                for i in 0..n {
365                    x_cen[(i, j)] -= scores[(i, k)] * p_jk;
366                }
367            }
368        }
369
370        Ok(scores)
371    }
372}
373
374/// Compute PLS weight vector: w = X'y / ||X'y||
375fn pls_compute_weights(x_cen: &FdMatrix, y_cen: &[f64]) -> Vec<f64> {
376    let (n, m) = x_cen.shape();
377    let mut w: Vec<f64> = (0..m)
378        .map(|j| {
379            let mut sum = 0.0;
380            for i in 0..n {
381                sum += x_cen[(i, j)] * y_cen[i];
382            }
383            sum
384        })
385        .collect();
386
387    let w_norm: f64 = w.iter().map(|&wi| wi * wi).sum::<f64>().sqrt();
388    if w_norm > 1e-10 {
389        for wi in &mut w {
390            *wi /= w_norm;
391        }
392    }
393    w
394}
395
396/// Compute PLS scores: t = Xw
397fn pls_compute_scores(x_cen: &FdMatrix, w: &[f64]) -> Vec<f64> {
398    let (n, m) = x_cen.shape();
399    (0..n)
400        .map(|i| {
401            let mut sum = 0.0;
402            for j in 0..m {
403                sum += x_cen[(i, j)] * w[j];
404            }
405            sum
406        })
407        .collect()
408}
409
410/// Compute PLS loadings: p = X't / (t't)
411fn pls_compute_loadings(x_cen: &FdMatrix, t: &[f64], t_norm_sq: f64) -> Vec<f64> {
412    let (n, m) = x_cen.shape();
413    (0..m)
414        .map(|j| {
415            let mut sum = 0.0;
416            for i in 0..n {
417                sum += x_cen[(i, j)] * t[i];
418            }
419            sum / t_norm_sq.max(1e-10)
420        })
421        .collect()
422}
423
424/// Deflate X by removing the rank-1 component t * p'
425fn pls_deflate_x(x_cen: &mut FdMatrix, t: &[f64], p: &[f64]) {
426    let (n, m) = x_cen.shape();
427    for j in 0..m {
428        for i in 0..n {
429            x_cen[(i, j)] -= t[i] * p[j];
430        }
431    }
432}
433
434/// Execute one NIPALS step: compute weights/scores/loadings and deflate X and y.
435fn pls_nipals_step(
436    k: usize,
437    x_cen: &mut FdMatrix,
438    y_cen: &mut [f64],
439    weights: &mut FdMatrix,
440    scores: &mut FdMatrix,
441    loadings: &mut FdMatrix,
442) {
443    let n = x_cen.nrows();
444    let m = x_cen.ncols();
445
446    let w = pls_compute_weights(x_cen, y_cen);
447    let t = pls_compute_scores(x_cen, &w);
448    let t_norm_sq: f64 = t.iter().map(|&ti| ti * ti).sum();
449    let p = pls_compute_loadings(x_cen, &t, t_norm_sq);
450
451    for j in 0..m {
452        weights[(j, k)] = w[j];
453        loadings[(j, k)] = p[j];
454    }
455    for i in 0..n {
456        scores[(i, k)] = t[i];
457    }
458
459    pls_deflate_x(x_cen, &t, &p);
460    let t_y: f64 = t.iter().zip(y_cen.iter()).map(|(&ti, &yi)| ti * yi).sum();
461    let q = t_y / t_norm_sq.max(1e-10);
462    for i in 0..n {
463        y_cen[i] -= t[i] * q;
464    }
465}
466
467/// Perform PLS via NIPALS algorithm.
468///
469/// # Arguments
470/// * `data` - Matrix (n x m): n observations, m evaluation points
471/// * `y` - Response vector (length n)
472/// * `ncomp` - Number of components to extract
473///
474/// # Errors
475///
476/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows or zero
477/// columns, or if `y.len()` does not equal the number of rows in `data`.
478/// Returns [`FdarError::InvalidParameter`] if `ncomp` is zero.
479#[must_use = "expensive computation whose result should not be discarded"]
480pub fn fdata_to_pls_1d(data: &FdMatrix, y: &[f64], ncomp: usize) -> Result<PlsResult, FdarError> {
481    let (n, m) = data.shape();
482    if n == 0 {
483        return Err(FdarError::InvalidDimension {
484            parameter: "data",
485            expected: "n > 0 rows".to_string(),
486            actual: format!("n = {n}"),
487        });
488    }
489    if m == 0 {
490        return Err(FdarError::InvalidDimension {
491            parameter: "data",
492            expected: "m > 0 columns".to_string(),
493            actual: format!("m = {m}"),
494        });
495    }
496    if y.len() != n {
497        return Err(FdarError::InvalidDimension {
498            parameter: "y",
499            expected: format!("length {n}"),
500            actual: format!("length {}", y.len()),
501        });
502    }
503    if ncomp < 1 {
504        return Err(FdarError::InvalidParameter {
505            parameter: "ncomp",
506            message: format!("ncomp must be >= 1, got {ncomp}"),
507        });
508    }
509
510    let ncomp = ncomp.min(n).min(m);
511
512    // Center X and y
513    let x_means: Vec<f64> = (0..m)
514        .map(|j| {
515            let col = data.column(j);
516            let sum: f64 = col.iter().sum();
517            sum / n as f64
518        })
519        .collect();
520
521    let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
522
523    let mut x_cen = FdMatrix::zeros(n, m);
524    for j in 0..m {
525        for i in 0..n {
526            x_cen[(i, j)] = data[(i, j)] - x_means[j];
527        }
528    }
529
530    let mut y_cen: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
531
532    let mut weights = FdMatrix::zeros(m, ncomp);
533    let mut scores = FdMatrix::zeros(n, ncomp);
534    let mut loadings = FdMatrix::zeros(m, ncomp);
535
536    // NIPALS algorithm
537    for k in 0..ncomp {
538        pls_nipals_step(
539            k,
540            &mut x_cen,
541            &mut y_cen,
542            &mut weights,
543            &mut scores,
544            &mut loadings,
545        );
546    }
547
548    Ok(PlsResult {
549        weights,
550        scores,
551        loadings,
552        x_means,
553    })
554}
555
556/// Result of ridge regression fit.
557#[derive(Debug, Clone)]
558#[cfg(feature = "linalg")]
559pub struct RidgeResult {
560    /// Coefficients
561    pub coefficients: Vec<f64>,
562    /// Intercept
563    pub intercept: f64,
564    /// Fitted values
565    pub fitted_values: Vec<f64>,
566    /// Residuals
567    pub residuals: Vec<f64>,
568    /// R-squared
569    pub r_squared: f64,
570    /// Lambda used
571    pub lambda: f64,
572    /// Error message if any
573    pub error: Option<String>,
574}
575
576/// Fit ridge regression.
577///
578/// # Arguments
579/// * `x` - Predictor matrix (n x m)
580/// * `y` - Response vector
581/// * `lambda` - Regularization parameter
582/// * `with_intercept` - Whether to include intercept
583#[cfg(feature = "linalg")]
584#[must_use = "expensive computation whose result should not be discarded"]
585pub fn ridge_regression_fit(
586    x: &FdMatrix,
587    y: &[f64],
588    lambda: f64,
589    with_intercept: bool,
590) -> RidgeResult {
591    let (n, m) = x.shape();
592    if n == 0 || m == 0 || y.len() != n {
593        return RidgeResult {
594            coefficients: Vec::new(),
595            intercept: 0.0,
596            fitted_values: Vec::new(),
597            residuals: Vec::new(),
598            r_squared: 0.0,
599            lambda,
600            error: Some("Invalid input dimensions".to_string()),
601        };
602    }
603
604    // Convert to faer Mat format
605    let x_faer = faer::Mat::from_fn(n, m, |i, j| x[(i, j)]);
606    let y_faer = faer::Col::from_fn(n, |i| y[i]);
607
608    // Build and fit the ridge regressor
609    let regressor = RidgeRegressor::builder()
610        .with_intercept(with_intercept)
611        .lambda(lambda)
612        .build();
613
614    let fitted = match regressor.fit(&x_faer, &y_faer) {
615        Ok(f) => f,
616        Err(e) => {
617            return RidgeResult {
618                coefficients: Vec::new(),
619                intercept: 0.0,
620                fitted_values: Vec::new(),
621                residuals: Vec::new(),
622                r_squared: 0.0,
623                lambda,
624                error: Some(format!("Fit failed: {e:?}")),
625            }
626        }
627    };
628
629    // Extract coefficients
630    let coefs = fitted.coefficients();
631    let coefficients: Vec<f64> = (0..coefs.nrows()).map(|i| coefs[i]).collect();
632
633    // Get intercept
634    let intercept = fitted.intercept().unwrap_or(0.0);
635
636    // Compute fitted values
637    let mut fitted_values = vec![0.0; n];
638    for i in 0..n {
639        let mut pred = intercept;
640        for j in 0..m {
641            pred += x[(i, j)] * coefficients[j];
642        }
643        fitted_values[i] = pred;
644    }
645
646    // Compute residuals
647    let residuals: Vec<f64> = y
648        .iter()
649        .zip(fitted_values.iter())
650        .map(|(&yi, &yhat)| yi - yhat)
651        .collect();
652
653    // Compute R-squared
654    let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
655    let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
656    let ss_res: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
657    let r_squared = if ss_tot > 0.0 {
658        1.0 - ss_res / ss_tot
659    } else {
660        0.0
661    };
662
663    RidgeResult {
664        coefficients,
665        intercept,
666        fitted_values,
667        residuals,
668        r_squared,
669        lambda,
670        error: None,
671    }
672}
673
674#[cfg(test)]
675mod tests {
676    use super::*;
677    use std::f64::consts::PI;
678
679    /// Generate functional data with known structure for testing
680    fn generate_test_fdata(n: usize, m: usize) -> (FdMatrix, Vec<f64>) {
681        let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
682
683        // Create n curves: sine waves with varying phase
684        let mut data = FdMatrix::zeros(n, m);
685        for i in 0..n {
686            let phase = (i as f64 / n as f64) * PI;
687            for j in 0..m {
688                data[(i, j)] = (2.0 * PI * t[j] + phase).sin();
689            }
690        }
691
692        (data, t)
693    }
694
695    // ============== FPCA tests ==============
696
697    #[test]
698    fn test_fdata_to_pc_1d_basic() {
699        let n = 20;
700        let m = 50;
701        let ncomp = 3;
702        let (data, _) = generate_test_fdata(n, m);
703
704        let result = fdata_to_pc_1d(&data, ncomp);
705        assert!(result.is_ok());
706
707        let fpca = result.unwrap();
708        assert_eq!(fpca.singular_values.len(), ncomp);
709        assert_eq!(fpca.rotation.shape(), (m, ncomp));
710        assert_eq!(fpca.scores.shape(), (n, ncomp));
711        assert_eq!(fpca.mean.len(), m);
712        assert_eq!(fpca.centered.shape(), (n, m));
713    }
714
715    #[test]
716    fn test_fdata_to_pc_1d_singular_values_decreasing() {
717        let n = 20;
718        let m = 50;
719        let ncomp = 5;
720        let (data, _) = generate_test_fdata(n, m);
721
722        let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
723
724        // Singular values should be in decreasing order
725        for i in 1..fpca.singular_values.len() {
726            assert!(
727                fpca.singular_values[i] <= fpca.singular_values[i - 1] + 1e-10,
728                "Singular values should be decreasing"
729            );
730        }
731    }
732
733    #[test]
734    fn test_fdata_to_pc_1d_centered_has_zero_mean() {
735        let n = 20;
736        let m = 50;
737        let (data, _) = generate_test_fdata(n, m);
738
739        let fpca = fdata_to_pc_1d(&data, 3).unwrap();
740
741        // Column means of centered data should be zero
742        for j in 0..m {
743            let col_mean: f64 = (0..n).map(|i| fpca.centered[(i, j)]).sum::<f64>() / n as f64;
744            assert!(
745                col_mean.abs() < 1e-10,
746                "Centered data should have zero column mean"
747            );
748        }
749    }
750
751    #[test]
752    fn test_fdata_to_pc_1d_ncomp_limits() {
753        let n = 10;
754        let m = 50;
755        let (data, _) = generate_test_fdata(n, m);
756
757        // Request more components than n - should cap at n
758        let fpca = fdata_to_pc_1d(&data, 20).unwrap();
759        assert!(fpca.singular_values.len() <= n);
760    }
761
762    #[test]
763    fn test_fdata_to_pc_1d_invalid_input() {
764        // Empty data
765        let empty = FdMatrix::zeros(0, 50);
766        let result = fdata_to_pc_1d(&empty, 3);
767        assert!(result.is_err());
768
769        // Zero components
770        let (data, _) = generate_test_fdata(10, 50);
771        let result = fdata_to_pc_1d(&data, 0);
772        assert!(result.is_err());
773    }
774
775    #[test]
776    fn test_fdata_to_pc_1d_reconstruction() {
777        let n = 10;
778        let m = 30;
779        let (data, _) = generate_test_fdata(n, m);
780
781        // Use all components for perfect reconstruction
782        let ncomp = n.min(m);
783        let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
784
785        // Reconstruct: X_centered = scores * rotation^T
786        for i in 0..n {
787            for j in 0..m {
788                let mut reconstructed = 0.0;
789                for k in 0..ncomp {
790                    let score = fpca.scores[(i, k)];
791                    let loading = fpca.rotation[(j, k)];
792                    reconstructed += score * loading;
793                }
794                let original_centered = fpca.centered[(i, j)];
795                assert!(
796                    (reconstructed - original_centered).abs() < 0.1,
797                    "Reconstruction error at ({}, {}): {} vs {}",
798                    i,
799                    j,
800                    reconstructed,
801                    original_centered
802                );
803            }
804        }
805    }
806
807    // ============== PLS tests ==============
808
809    #[test]
810    fn test_fdata_to_pls_1d_basic() {
811        let n = 20;
812        let m = 30;
813        let ncomp = 3;
814        let (x, _) = generate_test_fdata(n, m);
815
816        // Create y with some relationship to x
817        let y: Vec<f64> = (0..n).map(|i| (i as f64 / n as f64) + 0.1).collect();
818
819        let result = fdata_to_pls_1d(&x, &y, ncomp);
820        assert!(result.is_ok());
821
822        let pls = result.unwrap();
823        assert_eq!(pls.weights.shape(), (m, ncomp));
824        assert_eq!(pls.scores.shape(), (n, ncomp));
825        assert_eq!(pls.loadings.shape(), (m, ncomp));
826    }
827
828    #[test]
829    fn test_fdata_to_pls_1d_weights_normalized() {
830        let n = 20;
831        let m = 30;
832        let ncomp = 2;
833        let (x, _) = generate_test_fdata(n, m);
834        let y: Vec<f64> = (0..n).map(|i| i as f64).collect();
835
836        let pls = fdata_to_pls_1d(&x, &y, ncomp).unwrap();
837
838        // Weight vectors should be approximately unit norm
839        for k in 0..ncomp {
840            let norm: f64 = (0..m)
841                .map(|j| pls.weights[(j, k)].powi(2))
842                .sum::<f64>()
843                .sqrt();
844            assert!(
845                (norm - 1.0).abs() < 0.1,
846                "Weight vector {} should be unit norm, got {}",
847                k,
848                norm
849            );
850        }
851    }
852
853    #[test]
854    fn test_fdata_to_pls_1d_invalid_input() {
855        let (x, _) = generate_test_fdata(10, 30);
856
857        // Wrong y length
858        let result = fdata_to_pls_1d(&x, &[0.0; 5], 2);
859        assert!(result.is_err());
860
861        // Zero components
862        let y = vec![0.0; 10];
863        let result = fdata_to_pls_1d(&x, &y, 0);
864        assert!(result.is_err());
865    }
866
867    // ============== Ridge regression tests ==============
868
869    #[cfg(feature = "linalg")]
870    #[test]
871    fn test_ridge_regression_fit_basic() {
872        let n = 50;
873        let m = 5;
874
875        // Create X with known structure
876        let mut x = FdMatrix::zeros(n, m);
877        for i in 0..n {
878            for j in 0..m {
879                x[(i, j)] = (i as f64 + j as f64) / (n + m) as f64;
880            }
881        }
882
883        // Create y = sum of x columns + noise
884        let y: Vec<f64> = (0..n)
885            .map(|i| {
886                let mut sum = 0.0;
887                for j in 0..m {
888                    sum += x[(i, j)];
889                }
890                sum + 0.01 * (i as f64 % 10.0)
891            })
892            .collect();
893
894        let result = ridge_regression_fit(&x, &y, 0.1, true);
895
896        assert!(result.error.is_none(), "Ridge should fit without error");
897        assert_eq!(result.coefficients.len(), m);
898        assert_eq!(result.fitted_values.len(), n);
899        assert_eq!(result.residuals.len(), n);
900    }
901
902    #[cfg(feature = "linalg")]
903    #[test]
904    fn test_ridge_regression_fit_r_squared() {
905        let n = 50;
906        let m = 3;
907
908        let x = FdMatrix::from_column_major(
909            (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
910            n,
911            m,
912        )
913        .unwrap();
914        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
915
916        let result = ridge_regression_fit(&x, &y, 0.01, true);
917
918        assert!(
919            result.r_squared > 0.5,
920            "R-squared should be high, got {}",
921            result.r_squared
922        );
923        assert!(result.r_squared <= 1.0 + 1e-10, "R-squared should be <= 1");
924    }
925
926    #[cfg(feature = "linalg")]
927    #[test]
928    fn test_ridge_regression_fit_regularization() {
929        let n = 30;
930        let m = 10;
931
932        let x = FdMatrix::from_column_major(
933            (0..n * m)
934                .map(|i| ((i * 17) % 100) as f64 / 100.0)
935                .collect(),
936            n,
937            m,
938        )
939        .unwrap();
940        let y: Vec<f64> = (0..n).map(|i| (i as f64).sin()).collect();
941
942        let low_lambda = ridge_regression_fit(&x, &y, 0.001, true);
943        let high_lambda = ridge_regression_fit(&x, &y, 100.0, true);
944
945        let norm_low: f64 = low_lambda
946            .coefficients
947            .iter()
948            .map(|c| c.powi(2))
949            .sum::<f64>()
950            .sqrt();
951        let norm_high: f64 = high_lambda
952            .coefficients
953            .iter()
954            .map(|c| c.powi(2))
955            .sum::<f64>()
956            .sqrt();
957
958        assert!(
959            norm_high <= norm_low + 1e-6,
960            "Higher lambda should shrink coefficients: {} vs {}",
961            norm_high,
962            norm_low
963        );
964    }
965
966    #[cfg(feature = "linalg")]
967    #[test]
968    fn test_ridge_regression_fit_residuals() {
969        let n = 20;
970        let m = 3;
971
972        let x = FdMatrix::from_column_major(
973            (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
974            n,
975            m,
976        )
977        .unwrap();
978        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
979
980        let result = ridge_regression_fit(&x, &y, 0.1, true);
981
982        for i in 0..n {
983            let expected_resid = y[i] - result.fitted_values[i];
984            assert!(
985                (result.residuals[i] - expected_resid).abs() < 1e-10,
986                "Residual mismatch at {}",
987                i
988            );
989        }
990    }
991
992    #[cfg(feature = "linalg")]
993    #[test]
994    fn test_ridge_regression_fit_no_intercept() {
995        let n = 30;
996        let m = 5;
997
998        let x = FdMatrix::from_column_major(
999            (0..n * m).map(|i| i as f64 / (n * m) as f64).collect(),
1000            n,
1001            m,
1002        )
1003        .unwrap();
1004        let y: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
1005
1006        let result = ridge_regression_fit(&x, &y, 0.1, false);
1007
1008        assert!(result.error.is_none());
1009        assert!(
1010            result.intercept.abs() < 1e-10,
1011            "Intercept should be 0, got {}",
1012            result.intercept
1013        );
1014    }
1015
1016    #[cfg(feature = "linalg")]
1017    #[test]
1018    fn test_ridge_regression_fit_invalid_input() {
1019        let empty = FdMatrix::zeros(0, 5);
1020        let result = ridge_regression_fit(&empty, &[], 0.1, true);
1021        assert!(result.error.is_some());
1022
1023        let x = FdMatrix::zeros(10, 10);
1024        let y = vec![0.0; 5];
1025        let result = ridge_regression_fit(&x, &y, 0.1, true);
1026        assert!(result.error.is_some());
1027    }
1028
1029    #[test]
1030    fn test_all_zero_fpca() {
1031        // All-zero data: centering leaves zeros, SVD should return trivial result
1032        let n = 5;
1033        let m = 20;
1034        let data = FdMatrix::zeros(n, m);
1035        let result = fdata_to_pc_1d(&data, 2);
1036        // Should not panic; may return Ok with zero singular values
1037        if let Ok(res) = result {
1038            assert_eq!(res.scores.nrows(), n);
1039            for &sv in &res.singular_values {
1040                assert!(
1041                    sv.abs() < 1e-10,
1042                    "All-zero data should have zero singular values"
1043                );
1044            }
1045        }
1046    }
1047
1048    #[test]
1049    fn test_n1_pca() {
1050        // Single observation: centering leaves all zeros, SVD may return trivial result
1051        let data = FdMatrix::from_column_major(vec![1.0, 2.0, 3.0], 1, 3).unwrap();
1052        let result = fdata_to_pc_1d(&data, 1);
1053        // With n=1, centering leaves all zeros, so SVD may fail or return trivial result
1054        // Just ensure no panic
1055        let _ = result;
1056    }
1057
1058    #[test]
1059    fn test_constant_y_pls() {
1060        let n = 10;
1061        let m = 20;
1062        let data_vec: Vec<f64> = (0..n * m).map(|i| (i as f64 * 0.1).sin()).collect();
1063        let data = FdMatrix::from_column_major(data_vec, n, m).unwrap();
1064        let y = vec![5.0; n]; // Constant response
1065        let result = fdata_to_pls_1d(&data, &y, 2);
1066        // Constant y → centering makes y all zeros, PLS may fail
1067        // Just ensure no panic
1068        let _ = result;
1069    }
1070
1071    // ============== FpcaResult::project tests ==============
1072
1073    #[test]
1074    fn test_fpca_project_shape() {
1075        let n = 20;
1076        let m = 30;
1077        let ncomp = 3;
1078        let (data, _) = generate_test_fdata(n, m);
1079        let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
1080
1081        let new_data = FdMatrix::zeros(5, m);
1082        let scores = fpca.project(&new_data).unwrap();
1083        assert_eq!(scores.shape(), (5, ncomp));
1084    }
1085
1086    #[test]
1087    fn test_fpca_project_reproduces_training_scores() {
1088        let n = 20;
1089        let m = 30;
1090        let ncomp = 3;
1091        let (data, _) = generate_test_fdata(n, m);
1092        let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
1093
1094        // Projecting the training data should reproduce the original scores
1095        let scores = fpca.project(&data).unwrap();
1096        for i in 0..n {
1097            for k in 0..ncomp {
1098                assert!(
1099                    (scores[(i, k)] - fpca.scores[(i, k)]).abs() < 1e-8,
1100                    "Score mismatch at ({}, {}): {} vs {}",
1101                    i,
1102                    k,
1103                    scores[(i, k)],
1104                    fpca.scores[(i, k)]
1105                );
1106            }
1107        }
1108    }
1109
1110    #[test]
1111    fn test_fpca_project_dimension_mismatch() {
1112        let (data, _) = generate_test_fdata(20, 30);
1113        let fpca = fdata_to_pc_1d(&data, 3).unwrap();
1114
1115        let wrong_m = FdMatrix::zeros(5, 20); // wrong number of columns
1116        assert!(fpca.project(&wrong_m).is_err());
1117    }
1118
1119    // ============== FpcaResult::reconstruct tests ==============
1120
1121    #[test]
1122    fn test_fpca_reconstruct_shape() {
1123        let n = 10;
1124        let m = 30;
1125        let ncomp = 5;
1126        let (data, _) = generate_test_fdata(n, m);
1127        let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
1128
1129        let recon = fpca.reconstruct(&fpca.scores, 3).unwrap();
1130        assert_eq!(recon.shape(), (n, m));
1131    }
1132
1133    #[test]
1134    fn test_fpca_reconstruct_full_matches_original() {
1135        let n = 10;
1136        let m = 30;
1137        let ncomp = n.min(m);
1138        let (data, _) = generate_test_fdata(n, m);
1139        let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
1140
1141        // Full reconstruction should recover original data
1142        let recon = fpca.reconstruct(&fpca.scores, ncomp).unwrap();
1143        for i in 0..n {
1144            for j in 0..m {
1145                assert!(
1146                    (recon[(i, j)] - data[(i, j)]).abs() < 0.1,
1147                    "Reconstruction error at ({}, {}): {} vs {}",
1148                    i,
1149                    j,
1150                    recon[(i, j)],
1151                    data[(i, j)]
1152                );
1153            }
1154        }
1155    }
1156
1157    #[test]
1158    fn test_fpca_reconstruct_fewer_components() {
1159        let n = 20;
1160        let m = 30;
1161        let ncomp = 5;
1162        let (data, _) = generate_test_fdata(n, m);
1163        let fpca = fdata_to_pc_1d(&data, ncomp).unwrap();
1164
1165        let recon2 = fpca.reconstruct(&fpca.scores, 2).unwrap();
1166        let recon5 = fpca.reconstruct(&fpca.scores, 5).unwrap();
1167        assert_eq!(recon2.shape(), (n, m));
1168        assert_eq!(recon5.shape(), (n, m));
1169    }
1170
1171    #[test]
1172    fn test_fpca_reconstruct_invalid_ncomp() {
1173        let (data, _) = generate_test_fdata(10, 30);
1174        let fpca = fdata_to_pc_1d(&data, 3).unwrap();
1175
1176        // Zero components
1177        assert!(fpca.reconstruct(&fpca.scores, 0).is_err());
1178        // More components than available
1179        assert!(fpca.reconstruct(&fpca.scores, 10).is_err());
1180    }
1181
1182    // ============== PlsResult::project tests ==============
1183
1184    #[test]
1185    fn test_pls_project_shape() {
1186        let n = 20;
1187        let m = 30;
1188        let ncomp = 3;
1189        let (x, _) = generate_test_fdata(n, m);
1190        let y: Vec<f64> = (0..n).map(|i| i as f64).collect();
1191        let pls = fdata_to_pls_1d(&x, &y, ncomp).unwrap();
1192
1193        let new_x = FdMatrix::zeros(5, m);
1194        let scores = pls.project(&new_x).unwrap();
1195        assert_eq!(scores.shape(), (5, ncomp));
1196    }
1197
1198    #[test]
1199    fn test_pls_project_reproduces_training_scores() {
1200        let n = 20;
1201        let m = 30;
1202        let ncomp = 3;
1203        let (x, _) = generate_test_fdata(n, m);
1204        let y: Vec<f64> = (0..n).map(|i| (i as f64 / n as f64) + 0.1).collect();
1205        let pls = fdata_to_pls_1d(&x, &y, ncomp).unwrap();
1206
1207        // Projecting the training data should reproduce the original scores
1208        let scores = pls.project(&x).unwrap();
1209        for i in 0..n {
1210            for k in 0..ncomp {
1211                assert!(
1212                    (scores[(i, k)] - pls.scores[(i, k)]).abs() < 1e-8,
1213                    "Score mismatch at ({}, {}): {} vs {}",
1214                    i,
1215                    k,
1216                    scores[(i, k)],
1217                    pls.scores[(i, k)]
1218                );
1219            }
1220        }
1221    }
1222
1223    #[test]
1224    fn test_pls_project_dimension_mismatch() {
1225        let (x, _) = generate_test_fdata(20, 30);
1226        let y: Vec<f64> = (0..20).map(|i| i as f64).collect();
1227        let pls = fdata_to_pls_1d(&x, &y, 3).unwrap();
1228
1229        let wrong_m = FdMatrix::zeros(5, 20); // wrong number of columns
1230        assert!(pls.project(&wrong_m).is_err());
1231    }
1232
1233    #[test]
1234    fn test_pls_x_means_stored() {
1235        let n = 20;
1236        let m = 30;
1237        let (x, _) = generate_test_fdata(n, m);
1238        let y: Vec<f64> = (0..n).map(|i| i as f64).collect();
1239        let pls = fdata_to_pls_1d(&x, &y, 3).unwrap();
1240
1241        // x_means should be stored and have correct length
1242        assert_eq!(pls.x_means.len(), m);
1243    }
1244}
fdars_core/regression.rs

fdars_core/
regression.rs