fdars_core/
function_on_scalar.rs

1//! Function-on-scalar regression and functional ANOVA.
2//!
3//! Predicts a **functional response** from scalar/categorical predictors:
4//! ```text
5//! X_i(t) = μ(t) + Σⱼ βⱼ(t) · z_ij + ε_i(t)
6//! ```
7//!
8//! # Methods
9//!
10//! - [`fosr`]: Penalized function-on-scalar regression (pointwise OLS + smoothing)
11//! - [`fanova`]: Functional ANOVA with permutation-based global test
12//! - [`predict_fosr`]: Predict new curves from fitted model
13
14use crate::error::FdarError;
15use crate::iter_maybe_parallel;
16use crate::matrix::FdMatrix;
17use crate::regression::fdata_to_pc_1d;
18#[cfg(feature = "parallel")]
19use rayon::iter::ParallelIterator;
20
21// ---------------------------------------------------------------------------
22// Linear algebra helpers (self-contained)
23// ---------------------------------------------------------------------------
24
25/// Cholesky factorization: A = LL'. Returns L (p×p flat row-major) or None if singular.
26fn cholesky_factor(a: &[f64], p: usize) -> Option<Vec<f64>> {
27    let mut l = vec![0.0; p * p];
28    for j in 0..p {
29        let mut diag = a[j * p + j];
30        for k in 0..j {
31            diag -= l[j * p + k] * l[j * p + k];
32        }
33        if diag <= 1e-12 {
34            return None;
35        }
36        l[j * p + j] = diag.sqrt();
37        for i in (j + 1)..p {
38            let mut s = a[i * p + j];
39            for k in 0..j {
40                s -= l[i * p + k] * l[j * p + k];
41            }
42            l[i * p + j] = s / l[j * p + j];
43        }
44    }
45    Some(l)
46}
47
48/// Solve Lz = b (forward) then L'x = z (back).
49fn cholesky_forward_back(l: &[f64], b: &[f64], p: usize) -> Vec<f64> {
50    let mut z = b.to_vec();
51    for j in 0..p {
52        for k in 0..j {
53            z[j] -= l[j * p + k] * z[k];
54        }
55        z[j] /= l[j * p + j];
56    }
57    for j in (0..p).rev() {
58        for k in (j + 1)..p {
59            z[j] -= l[k * p + j] * z[k];
60        }
61        z[j] /= l[j * p + j];
62    }
63    z
64}
65
66/// Compute X'X (symmetric, p×p stored flat row-major).
67pub(crate) fn compute_xtx(x: &FdMatrix) -> Vec<f64> {
68    let (n, p) = x.shape();
69    let mut xtx = vec![0.0; p * p];
70    for k in 0..p {
71        for j in k..p {
72            let mut s = 0.0;
73            for i in 0..n {
74                s += x[(i, k)] * x[(i, j)];
75            }
76            xtx[k * p + j] = s;
77            xtx[j * p + k] = s;
78        }
79    }
80    xtx
81}
82
83// ---------------------------------------------------------------------------
84// Result types
85// ---------------------------------------------------------------------------
86
87/// Result of function-on-scalar regression.
88#[derive(Debug, Clone, PartialEq)]
89#[non_exhaustive]
90pub struct FosrResult {
91    /// Intercept function μ(t) (length m)
92    pub intercept: Vec<f64>,
93    /// Coefficient functions β_j(t), one per predictor (p × m matrix, row j = βⱼ(t))
94    pub beta: FdMatrix,
95    /// Fitted functional values (n × m matrix)
96    pub fitted: FdMatrix,
97    /// Residual functions (n × m matrix)
98    pub residuals: FdMatrix,
99    /// Pointwise R² across the domain (length m)
100    pub r_squared_t: Vec<f64>,
101    /// Global R² (integrated)
102    pub r_squared: f64,
103    /// Pointwise standard errors for each βⱼ(t) (p × m matrix)
104    pub beta_se: FdMatrix,
105    /// Smoothing parameter λ used
106    pub lambda: f64,
107    /// GCV value
108    pub gcv: f64,
109}
110
111/// Result of FPC-based function-on-scalar regression.
112#[derive(Debug, Clone, PartialEq)]
113#[non_exhaustive]
114pub struct FosrFpcResult {
115    /// Intercept function μ(t) (length m)
116    pub intercept: Vec<f64>,
117    /// Coefficient functions β_j(t), one per predictor (p × m matrix, row j = βⱼ(t))
118    pub beta: FdMatrix,
119    /// Fitted functional values (n × m matrix)
120    pub fitted: FdMatrix,
121    /// Residual functions (n × m matrix)
122    pub residuals: FdMatrix,
123    /// Pointwise R² across the domain (length m)
124    pub r_squared_t: Vec<f64>,
125    /// Global R² (integrated)
126    pub r_squared: f64,
127    /// FPC-space regression coefficients gamma\[j\]\[k\] (one `Vec<f64>` per predictor)
128    pub beta_scores: Vec<Vec<f64>>,
129    /// Number of FPC components used
130    pub ncomp: usize,
131}
132
133/// Result of functional ANOVA.
134#[derive(Debug, Clone, PartialEq)]
135#[non_exhaustive]
136pub struct FanovaResult {
137    /// Group mean functions (k × m matrix, row g = mean curve of group g)
138    pub group_means: FdMatrix,
139    /// Overall mean function (length m)
140    pub overall_mean: Vec<f64>,
141    /// Pointwise F-statistic across the domain (length m)
142    pub f_statistic_t: Vec<f64>,
143    /// Global test statistic (integrated F)
144    pub global_statistic: f64,
145    /// P-value from permutation test
146    pub p_value: f64,
147    /// Number of permutations performed
148    pub n_perm: usize,
149    /// Number of groups
150    pub n_groups: usize,
151    /// Group labels (sorted unique values)
152    pub group_labels: Vec<usize>,
153}
154
155// ---------------------------------------------------------------------------
156// Shared helpers
157// ---------------------------------------------------------------------------
158
159/// Build second-order difference penalty matrix D'D (p×p, flat row-major).
160pub(crate) fn penalty_matrix(m: usize) -> Vec<f64> {
161    if m < 3 {
162        return vec![0.0; m * m];
163    }
164    // D is (m-2)×m second-difference operator
165    // D'D is m×m symmetric banded matrix
166    let mut dtd = vec![0.0; m * m];
167    for i in 0..m - 2 {
168        // D[i,:] = [0..0, 1, -2, 1, 0..0] at positions i, i+1, i+2
169        let coeffs = [(i, 1.0), (i + 1, -2.0), (i + 2, 1.0)];
170        for &(r, cr) in &coeffs {
171            for &(c, cc) in &coeffs {
172                dtd[r * m + c] += cr * cc;
173            }
174        }
175    }
176    dtd
177}
178
179/// Solve (A + λP)x = b for each column of B (pointwise regression at each t).
180/// A is X'X (p×p), B is X'Y (p×m), P is penalty matrix (p×p).
181/// Returns coefficient matrix (p×m).
182fn penalized_solve(
183    xtx: &[f64],
184    xty: &FdMatrix,
185    penalty: &[f64],
186    lambda: f64,
187) -> Result<FdMatrix, FdarError> {
188    let p = xty.nrows();
189    let m = xty.ncols();
190
191    // Build (X'X + λP)
192    let mut a = vec![0.0; p * p];
193    for i in 0..p * p {
194        a[i] = xtx[i] + lambda * penalty[i];
195    }
196
197    // Cholesky factor
198    let l = cholesky_factor(&a, p).ok_or_else(|| FdarError::ComputationFailed {
199        operation: "penalized_solve",
200        detail: format!(
201            "Cholesky factorization of (X'X + {lambda:.4}*P) failed; matrix is singular — try increasing lambda or removing collinear basis columns"
202        ),
203    })?;
204
205    // Solve for each grid point
206    let mut beta = FdMatrix::zeros(p, m);
207    for t in 0..m {
208        let b: Vec<f64> = (0..p).map(|j| xty[(j, t)]).collect();
209        let x = cholesky_forward_back(&l, &b, p);
210        for j in 0..p {
211            beta[(j, t)] = x[j];
212        }
213    }
214    Ok(beta)
215}
216
217/// Compute pointwise R² at each grid point.
218pub(crate) fn pointwise_r_squared(data: &FdMatrix, fitted: &FdMatrix) -> Vec<f64> {
219    let (n, m) = data.shape();
220    (0..m)
221        .map(|t| {
222            let mean_t: f64 = (0..n).map(|i| data[(i, t)]).sum::<f64>() / n as f64;
223            let ss_tot: f64 = (0..n).map(|i| (data[(i, t)] - mean_t).powi(2)).sum();
224            let ss_res: f64 = (0..n)
225                .map(|i| (data[(i, t)] - fitted[(i, t)]).powi(2))
226                .sum();
227            if ss_tot > 1e-15 {
228                1.0 - ss_res / ss_tot
229            } else {
230                0.0
231            }
232        })
233        .collect()
234}
235
236/// GCV for penalized regression: (1/nm) Σ_{i,t} (r_{it} / (1 - tr(H)/n))²
237fn compute_fosr_gcv(residuals: &FdMatrix, trace_h: f64) -> f64 {
238    let (n, m) = residuals.shape();
239    let denom = (1.0 - trace_h / n as f64).max(1e-10);
240    let ss_res: f64 = (0..n)
241        .flat_map(|i| (0..m).map(move |t| residuals[(i, t)].powi(2)))
242        .sum();
243    ss_res / (n as f64 * m as f64 * denom * denom)
244}
245
246// ---------------------------------------------------------------------------
247// fosr: Function-on-scalar regression
248// ---------------------------------------------------------------------------
249
250/// Penalized function-on-scalar regression.
251///
252/// Fits pointwise OLS at each grid point t, then smooths the coefficient
253/// functions β_j(t) using a second-order roughness penalty.
254///
255/// # Arguments
256/// * `data` - Functional response matrix (n × m)
257/// * `predictors` - Scalar predictor matrix (n × p)
258/// * `lambda` - Smoothing parameter (0 for no smoothing, negative for GCV selection)
259///
260/// # Returns
261/// [`FosrResult`] with coefficient functions, fitted values, and diagnostics
262///
263/// # Errors
264///
265/// Returns [`FdarError::InvalidDimension`] if `data` has zero columns,
266/// `predictors` row count does not match `data`, or `n < p + 2`.
267/// Returns [`FdarError::ComputationFailed`] if the penalized Cholesky solve
268/// is singular.
269/// Build design matrix with intercept: \[1, z_1, ..., z_p\].
270pub(crate) fn build_fosr_design(predictors: &FdMatrix, n: usize) -> FdMatrix {
271    let p = predictors.ncols();
272    let p_total = p + 1;
273    let mut design = FdMatrix::zeros(n, p_total);
274    for i in 0..n {
275        design[(i, 0)] = 1.0;
276        for j in 0..p {
277            design[(i, 1 + j)] = predictors[(i, j)];
278        }
279    }
280    design
281}
282
283/// Compute X'Y (p_total × m).
284pub(crate) fn compute_xty_matrix(design: &FdMatrix, data: &FdMatrix) -> FdMatrix {
285    let (n, m) = data.shape();
286    let p_total = design.ncols();
287    let mut xty = FdMatrix::zeros(p_total, m);
288    for j in 0..p_total {
289        for t in 0..m {
290            let mut s = 0.0;
291            for i in 0..n {
292                s += design[(i, j)] * data[(i, t)];
293            }
294            xty[(j, t)] = s;
295        }
296    }
297    xty
298}
299
300/// Extract rows 1..p+1 from a (p+1)×m matrix, dropping the intercept row.
301fn drop_intercept_rows(full: &FdMatrix, p: usize, m: usize) -> FdMatrix {
302    let mut out = FdMatrix::zeros(p, m);
303    for j in 0..p {
304        for t in 0..m {
305            out[(j, t)] = full[(j + 1, t)];
306        }
307    }
308    out
309}
310
311/// Penalized function-on-scalar regression.
312///
313/// Fits the model `X_i(t) = mu(t) + sum_j beta_j(t) * z_ij + epsilon_i(t)`
314/// using pointwise OLS with second-order roughness penalty smoothing.
315///
316/// # Arguments
317/// * `data` - Functional response matrix (n x m)
318/// * `predictors` - Scalar predictor matrix (n x p)
319/// * `lambda` - Roughness penalty (negative for automatic GCV selection)
320///
321/// # Examples
322///
323/// ```
324/// use fdars_core::matrix::FdMatrix;
325/// use fdars_core::function_on_scalar::fosr;
326///
327/// // 10 curves at 15 grid points, 2 scalar predictors
328/// let data = FdMatrix::from_column_major(
329///     (0..150).map(|i| (i as f64 * 0.1).sin()).collect(),
330///     10, 15,
331/// ).unwrap();
332/// let predictors = FdMatrix::from_column_major(
333///     (0..20).map(|i| i as f64 / 19.0).collect(),
334///     10, 2,
335/// ).unwrap();
336/// let result = fosr(&data, &predictors, 0.1).unwrap();
337/// assert_eq!(result.fitted.shape(), (10, 15));
338/// assert_eq!(result.intercept.len(), 15);
339/// ```
340#[must_use = "expensive computation whose result should not be discarded"]
341pub fn fosr(data: &FdMatrix, predictors: &FdMatrix, lambda: f64) -> Result<FosrResult, FdarError> {
342    let (n, m) = data.shape();
343    let p = predictors.ncols();
344    if m == 0 {
345        return Err(FdarError::InvalidDimension {
346            parameter: "data",
347            expected: "at least 1 column (grid points)".to_string(),
348            actual: "0 columns".to_string(),
349        });
350    }
351    if predictors.nrows() != n {
352        return Err(FdarError::InvalidDimension {
353            parameter: "predictors",
354            expected: format!("{n} rows (matching data)"),
355            actual: format!("{} rows", predictors.nrows()),
356        });
357    }
358    if n < p + 2 {
359        return Err(FdarError::InvalidDimension {
360            parameter: "data",
361            expected: format!("at least {} observations (p + 2)", p + 2),
362            actual: format!("{n} observations"),
363        });
364    }
365
366    let design = build_fosr_design(predictors, n);
367    let p_total = design.ncols();
368    let xtx = compute_xtx(&design);
369    let xty = compute_xty_matrix(&design, data);
370    let penalty = penalty_matrix(p_total);
371
372    let lambda = if lambda < 0.0 {
373        select_lambda_gcv(&xtx, &xty, &penalty, data, &design)
374    } else {
375        lambda
376    };
377
378    let beta = penalized_solve(&xtx, &xty, &penalty, lambda)?;
379    let (fitted, residuals) = compute_fosr_fitted(&design, &beta, data);
380
381    let r_squared_t = pointwise_r_squared(data, &fitted);
382    let r_squared = r_squared_t.iter().sum::<f64>() / m as f64;
383    let beta_se = compute_beta_se(&xtx, &penalty, lambda, &residuals, p_total, n);
384    let trace_h = compute_trace_hat(&xtx, &penalty, lambda, p_total, n);
385    let gcv = compute_fosr_gcv(&residuals, trace_h);
386
387    let intercept: Vec<f64> = (0..m).map(|t| beta[(0, t)]).collect();
388
389    Ok(FosrResult {
390        intercept,
391        beta: drop_intercept_rows(&beta, p, m),
392        fitted,
393        residuals,
394        r_squared_t,
395        r_squared,
396        beta_se: drop_intercept_rows(&beta_se, p, m),
397        lambda,
398        gcv,
399    })
400}
401
402/// Compute fitted values Ŷ = X β and residuals.
403fn compute_fosr_fitted(
404    design: &FdMatrix,
405    beta: &FdMatrix,
406    data: &FdMatrix,
407) -> (FdMatrix, FdMatrix) {
408    let (n, m) = data.shape();
409    let p_total = design.ncols();
410    let rows: Vec<(Vec<f64>, Vec<f64>)> = iter_maybe_parallel!(0..n)
411        .map(|i| {
412            let mut fitted_row = vec![0.0; m];
413            let mut resid_row = vec![0.0; m];
414            for t in 0..m {
415                let mut yhat = 0.0;
416                for j in 0..p_total {
417                    yhat += design[(i, j)] * beta[(j, t)];
418                }
419                fitted_row[t] = yhat;
420                resid_row[t] = data[(i, t)] - yhat;
421            }
422            (fitted_row, resid_row)
423        })
424        .collect();
425    let mut fitted = FdMatrix::zeros(n, m);
426    let mut residuals = FdMatrix::zeros(n, m);
427    for (i, (fr, rr)) in rows.into_iter().enumerate() {
428        for t in 0..m {
429            fitted[(i, t)] = fr[t];
430            residuals[(i, t)] = rr[t];
431        }
432    }
433    (fitted, residuals)
434}
435
436/// Select smoothing parameter λ via GCV on a grid.
437fn select_lambda_gcv(
438    xtx: &[f64],
439    xty: &FdMatrix,
440    penalty: &[f64],
441    data: &FdMatrix,
442    design: &FdMatrix,
443) -> f64 {
444    let lambdas = [0.0, 1e-6, 1e-4, 1e-2, 0.1, 1.0, 10.0, 100.0, 1000.0];
445    let p_total = design.ncols();
446    let n = design.nrows();
447
448    let mut best_lambda = 0.0;
449    let mut best_gcv = f64::INFINITY;
450
451    for &lam in &lambdas {
452        let Ok(beta) = penalized_solve(xtx, xty, penalty, lam) else {
453            continue;
454        };
455        let (_, residuals) = compute_fosr_fitted(design, &beta, data);
456        let trace_h = compute_trace_hat(xtx, penalty, lam, p_total, n);
457        let gcv = compute_fosr_gcv(&residuals, trace_h);
458        if gcv < best_gcv {
459            best_gcv = gcv;
460            best_lambda = lam;
461        }
462    }
463    best_lambda
464}
465
466/// Compute trace of hat matrix: tr(H) = tr(X (X'X + λP)^{-1} X') = Σ_j h_jj.
467fn compute_trace_hat(xtx: &[f64], penalty: &[f64], lambda: f64, p: usize, n: usize) -> f64 {
468    let mut a = vec![0.0; p * p];
469    for i in 0..p * p {
470        a[i] = xtx[i] + lambda * penalty[i];
471    }
472    // tr(H) = tr(X A^{-1} X') = Σ_{j=0..p} a^{-1}_{jj} * xtx_{jj}
473    // More precisely: tr(X (X'X+λP)^{-1} X') = tr((X'X+λP)^{-1} X'X)
474    let Some(l) = cholesky_factor(&a, p) else {
475        return p as f64; // fallback
476    };
477
478    // Compute A^{-1} X'X via solving A Z = X'X column by column, then trace
479    let mut trace = 0.0;
480    for j in 0..p {
481        let col: Vec<f64> = (0..p).map(|i| xtx[i * p + j]).collect();
482        let z = cholesky_forward_back(&l, &col, p);
483        trace += z[j]; // diagonal element of A^{-1} X'X
484    }
485    trace.min(n as f64)
486}
487
488/// Compute pointwise standard errors for β(t).
489fn compute_beta_se(
490    xtx: &[f64],
491    penalty: &[f64],
492    lambda: f64,
493    residuals: &FdMatrix,
494    p: usize,
495    n: usize,
496) -> FdMatrix {
497    let m = residuals.ncols();
498    let mut a = vec![0.0; p * p];
499    for i in 0..p * p {
500        a[i] = xtx[i] + lambda * penalty[i];
501    }
502    let Some(l) = cholesky_factor(&a, p) else {
503        return FdMatrix::zeros(p, m);
504    };
505
506    // Diagonal of A^{-1}
507    let a_inv_diag: Vec<f64> = (0..p)
508        .map(|j| {
509            let mut ej = vec![0.0; p];
510            ej[j] = 1.0;
511            let v = cholesky_forward_back(&l, &ej, p);
512            v[j]
513        })
514        .collect();
515
516    let df = (n - p).max(1) as f64;
517    let mut se = FdMatrix::zeros(p, m);
518    for t in 0..m {
519        let sigma2_t: f64 = (0..n).map(|i| residuals[(i, t)].powi(2)).sum::<f64>() / df;
520        for j in 0..p {
521            se[(j, t)] = (sigma2_t * a_inv_diag[j]).max(0.0).sqrt();
522        }
523    }
524    se
525}
526
527// ---------------------------------------------------------------------------
528// fosr_fpc: FPC-based function-on-scalar regression (matches R's fda.usc approach)
529// ---------------------------------------------------------------------------
530
531/// OLS regression of each FPC score on the design matrix.
532///
533/// Returns gamma_all\[comp\]\[coef\] = (X'X)^{-1} X' scores\[:,comp\].
534fn regress_scores_on_design(
535    design: &FdMatrix,
536    scores: &FdMatrix,
537    n: usize,
538    k: usize,
539    p_total: usize,
540) -> Result<Vec<Vec<f64>>, FdarError> {
541    let xtx = compute_xtx(design);
542    let l = cholesky_factor(&xtx, p_total).ok_or_else(|| FdarError::ComputationFailed {
543        operation: "regress_scores_on_design",
544        detail: "Cholesky factorization of X'X failed; design matrix is rank-deficient — remove constant or collinear predictors, or add regularization".to_string(),
545    })?;
546
547    let gamma_all: Vec<Vec<f64>> = (0..k)
548        .map(|comp| {
549            let mut xts = vec![0.0; p_total];
550            for j in 0..p_total {
551                for i in 0..n {
552                    xts[j] += design[(i, j)] * scores[(i, comp)];
553                }
554            }
555            cholesky_forward_back(&l, &xts, p_total)
556        })
557        .collect();
558    Ok(gamma_all)
559}
560
561/// Reconstruct β_j(t) = Σ_k gamma\[comp\]\[1+j\] · φ_k(t) for each predictor j.
562fn reconstruct_beta_fpc(
563    gamma_all: &[Vec<f64>],
564    rotation: &FdMatrix,
565    p: usize,
566    k: usize,
567    m: usize,
568) -> FdMatrix {
569    let mut beta = FdMatrix::zeros(p, m);
570    for j in 0..p {
571        for t in 0..m {
572            let mut val = 0.0;
573            for comp in 0..k {
574                val += gamma_all[comp][1 + j] * rotation[(t, comp)];
575            }
576            beta[(j, t)] = val;
577        }
578    }
579    beta
580}
581
582/// Compute intercept function: μ(t) + Σ_k γ_intercept\[k\] · φ_k(t).
583fn compute_intercept_fpc(
584    mean: &[f64],
585    gamma_all: &[Vec<f64>],
586    rotation: &FdMatrix,
587    k: usize,
588    m: usize,
589) -> Vec<f64> {
590    let mut intercept = mean.to_vec();
591    for t in 0..m {
592        for comp in 0..k {
593            intercept[t] += gamma_all[comp][0] * rotation[(t, comp)];
594        }
595    }
596    intercept
597}
598
599/// Extract L²-normalized beta_scores from regression coefficients.
600fn extract_beta_scores(gamma_all: &[Vec<f64>], p: usize, k: usize, m: usize) -> Vec<Vec<f64>> {
601    let h = if m > 1 { 1.0 / (m - 1) as f64 } else { 1.0 };
602    let score_scale = h.sqrt();
603    (0..p)
604        .map(|j| {
605            (0..k)
606                .map(|comp| gamma_all[comp][1 + j] * score_scale)
607                .collect()
608        })
609        .collect()
610}
611
612/// FPC-based function-on-scalar regression.
613///
614/// Reduces the functional response to FPC scores, regresses each score on the
615/// scalar predictors via OLS, then reconstructs β(t) from the loadings.
616/// This matches R's `fdata2pc` + `lm(scores ~ x)` approach.
617///
618/// # Arguments
619/// * `data` - Functional response matrix (n × m)
620/// * `predictors` - Scalar predictor matrix (n × p)
621/// * `ncomp` - Number of FPC components to use
622///
623/// # Errors
624///
625/// Returns [`FdarError::InvalidDimension`] if `data` has zero columns,
626/// `predictors` row count does not match `data`, or `n < p + 2`.
627/// Returns [`FdarError::InvalidParameter`] if `ncomp` is zero.
628/// Returns [`FdarError::ComputationFailed`] if FPCA fails or the OLS
629/// Cholesky factorization of X'X is singular.
630#[must_use = "expensive computation whose result should not be discarded"]
631pub fn fosr_fpc(
632    data: &FdMatrix,
633    predictors: &FdMatrix,
634    ncomp: usize,
635) -> Result<FosrFpcResult, FdarError> {
636    let (n, m) = data.shape();
637    let p = predictors.ncols();
638    if m == 0 {
639        return Err(FdarError::InvalidDimension {
640            parameter: "data",
641            expected: "at least 1 column (grid points)".to_string(),
642            actual: "0 columns".to_string(),
643        });
644    }
645    if predictors.nrows() != n {
646        return Err(FdarError::InvalidDimension {
647            parameter: "predictors",
648            expected: format!("{n} rows (matching data)"),
649            actual: format!("{} rows", predictors.nrows()),
650        });
651    }
652    if n < p + 2 {
653        return Err(FdarError::InvalidDimension {
654            parameter: "data",
655            expected: format!("at least {} observations (p + 2)", p + 2),
656            actual: format!("{n} observations"),
657        });
658    }
659    if ncomp == 0 {
660        return Err(FdarError::InvalidParameter {
661            parameter: "ncomp",
662            message: "number of FPC components must be at least 1".to_string(),
663        });
664    }
665
666    let fpca = fdata_to_pc_1d(data, ncomp)?;
667    let k = fpca.scores.ncols();
668    let p_total = p + 1;
669    let design = build_fosr_design(predictors, n);
670
671    let gamma_all = regress_scores_on_design(&design, &fpca.scores, n, k, p_total)?;
672    let beta = reconstruct_beta_fpc(&gamma_all, &fpca.rotation, p, k, m);
673    let intercept = compute_intercept_fpc(&fpca.mean, &gamma_all, &fpca.rotation, k, m);
674
675    let (fitted, residuals) = compute_fosr_fpc_fitted(data, &intercept, &beta, predictors);
676    let r_squared_t = pointwise_r_squared(data, &fitted);
677    let r_squared = r_squared_t.iter().sum::<f64>() / m as f64;
678    let beta_scores = extract_beta_scores(&gamma_all, p, k, m);
679
680    Ok(FosrFpcResult {
681        intercept,
682        beta,
683        fitted,
684        residuals,
685        r_squared_t,
686        r_squared,
687        beta_scores,
688        ncomp: k,
689    })
690}
691
692/// Compute fitted values and residuals for FPC-based FOSR.
693fn compute_fosr_fpc_fitted(
694    data: &FdMatrix,
695    intercept: &[f64],
696    beta: &FdMatrix,
697    predictors: &FdMatrix,
698) -> (FdMatrix, FdMatrix) {
699    let (n, m) = data.shape();
700    let p = predictors.ncols();
701    let mut fitted = FdMatrix::zeros(n, m);
702    let mut residuals = FdMatrix::zeros(n, m);
703    for i in 0..n {
704        for t in 0..m {
705            let mut yhat = intercept[t];
706            for j in 0..p {
707                yhat += predictors[(i, j)] * beta[(j, t)];
708            }
709            fitted[(i, t)] = yhat;
710            residuals[(i, t)] = data[(i, t)] - yhat;
711        }
712    }
713    (fitted, residuals)
714}
715
716/// Predict new functional responses from a fitted FOSR model.
717///
718/// # Arguments
719/// * `result` - Fitted [`FosrResult`]
720/// * `new_predictors` - New scalar predictors (n_new × p)
721#[must_use = "prediction result should not be discarded"]
722pub fn predict_fosr(result: &FosrResult, new_predictors: &FdMatrix) -> FdMatrix {
723    let n_new = new_predictors.nrows();
724    let m = result.intercept.len();
725    let p = result.beta.nrows();
726
727    let mut predicted = FdMatrix::zeros(n_new, m);
728    for i in 0..n_new {
729        for t in 0..m {
730            let mut yhat = result.intercept[t];
731            for j in 0..p {
732                yhat += new_predictors[(i, j)] * result.beta[(j, t)];
733            }
734            predicted[(i, t)] = yhat;
735        }
736    }
737    predicted
738}
739
740// ---------------------------------------------------------------------------
741// fanova: Functional ANOVA
742// ---------------------------------------------------------------------------
743
744/// Compute group means and overall mean.
745fn compute_group_means(
746    data: &FdMatrix,
747    groups: &[usize],
748    labels: &[usize],
749) -> (FdMatrix, Vec<f64>) {
750    let (n, m) = data.shape();
751    let k = labels.len();
752    let mut group_means = FdMatrix::zeros(k, m);
753    let mut counts = vec![0usize; k];
754
755    for i in 0..n {
756        let g = labels.iter().position(|&l| l == groups[i]).unwrap_or(0);
757        counts[g] += 1;
758        for t in 0..m {
759            group_means[(g, t)] += data[(i, t)];
760        }
761    }
762    for g in 0..k {
763        if counts[g] > 0 {
764            for t in 0..m {
765                group_means[(g, t)] /= counts[g] as f64;
766            }
767        }
768    }
769
770    let overall_mean: Vec<f64> = (0..m)
771        .map(|t| (0..n).map(|i| data[(i, t)]).sum::<f64>() / n as f64)
772        .collect();
773
774    (group_means, overall_mean)
775}
776
777/// Compute pointwise F-statistic.
778fn pointwise_f_statistic(
779    data: &FdMatrix,
780    groups: &[usize],
781    labels: &[usize],
782    group_means: &FdMatrix,
783    overall_mean: &[f64],
784) -> Vec<f64> {
785    let (n, m) = data.shape();
786    let k = labels.len();
787    let mut counts = vec![0usize; k];
788    for &g in groups {
789        let idx = labels.iter().position(|&l| l == g).unwrap_or(0);
790        counts[idx] += 1;
791    }
792
793    (0..m)
794        .map(|t| {
795            let ss_between: f64 = (0..k)
796                .map(|g| counts[g] as f64 * (group_means[(g, t)] - overall_mean[t]).powi(2))
797                .sum();
798            let ss_within: f64 = (0..n)
799                .map(|i| {
800                    let g = labels.iter().position(|&l| l == groups[i]).unwrap_or(0);
801                    (data[(i, t)] - group_means[(g, t)]).powi(2)
802                })
803                .sum();
804            let ms_between = ss_between / (k as f64 - 1.0).max(1.0);
805            let ms_within = ss_within / (n as f64 - k as f64).max(1.0);
806            if ms_within > 1e-15 {
807                ms_between / ms_within
808            } else {
809                0.0
810            }
811        })
812        .collect()
813}
814
815/// Compute global test statistic (integrated F).
816fn global_f_statistic(f_t: &[f64]) -> f64 {
817    f_t.iter().sum::<f64>() / f_t.len() as f64
818}
819
820/// Functional ANOVA: test whether groups have different mean curves.
821///
822/// Uses a permutation-based global test with the integrated F-statistic.
823///
824/// # Arguments
825/// * `data` - Functional response matrix (n × m)
826/// * `groups` - Group labels for each observation (length n, integer-coded)
827/// * `n_perm` - Number of permutations for the global test
828///
829/// # Returns
830/// [`FanovaResult`] with group means, F-statistics, and permutation p-value
831///
832/// # Errors
833///
834/// Returns [`FdarError::InvalidDimension`] if `data` has zero columns,
835/// `groups.len()` does not match the number of rows in `data`, or `n < 3`.
836/// Returns [`FdarError::InvalidParameter`] if fewer than 2 distinct groups
837/// are present.
838#[must_use = "expensive computation whose result should not be discarded"]
839pub fn fanova(data: &FdMatrix, groups: &[usize], n_perm: usize) -> Result<FanovaResult, FdarError> {
840    let (n, m) = data.shape();
841    if m == 0 {
842        return Err(FdarError::InvalidDimension {
843            parameter: "data",
844            expected: "at least 1 column (grid points)".to_string(),
845            actual: "0 columns".to_string(),
846        });
847    }
848    if groups.len() != n {
849        return Err(FdarError::InvalidDimension {
850            parameter: "groups",
851            expected: format!("{n} elements (matching data rows)"),
852            actual: format!("{} elements", groups.len()),
853        });
854    }
855    if n < 3 {
856        return Err(FdarError::InvalidDimension {
857            parameter: "data",
858            expected: "at least 3 observations".to_string(),
859            actual: format!("{n} observations"),
860        });
861    }
862
863    let mut labels: Vec<usize> = groups.to_vec();
864    labels.sort_unstable();
865    labels.dedup();
866    let n_groups = labels.len();
867    if n_groups < 2 {
868        return Err(FdarError::InvalidParameter {
869            parameter: "groups",
870            message: format!("at least 2 distinct groups required, but only {n_groups} found"),
871        });
872    }
873
874    let (group_means, overall_mean) = compute_group_means(data, groups, &labels);
875    let f_t = pointwise_f_statistic(data, groups, &labels, &group_means, &overall_mean);
876    let observed_stat = global_f_statistic(&f_t);
877
878    // Permutation test
879    let n_perm = n_perm.max(1);
880    let mut n_ge = 0usize;
881    let mut perm_groups = groups.to_vec();
882
883    // Simple LCG for reproducibility without requiring rand
884    let mut rng_state: u64 = 42;
885    for _ in 0..n_perm {
886        // Fisher-Yates shuffle with LCG
887        for i in (1..n).rev() {
888            rng_state = rng_state
889                .wrapping_mul(6_364_136_223_846_793_005)
890                .wrapping_add(1);
891            let j = (rng_state >> 33) as usize % (i + 1);
892            perm_groups.swap(i, j);
893        }
894
895        let (perm_means, perm_overall) = compute_group_means(data, &perm_groups, &labels);
896        let perm_f = pointwise_f_statistic(data, &perm_groups, &labels, &perm_means, &perm_overall);
897        let perm_stat = global_f_statistic(&perm_f);
898        if perm_stat >= observed_stat {
899            n_ge += 1;
900        }
901    }
902
903    let p_value = (n_ge as f64 + 1.0) / (n_perm as f64 + 1.0);
904
905    Ok(FanovaResult {
906        group_means,
907        overall_mean,
908        f_statistic_t: f_t,
909        global_statistic: observed_stat,
910        p_value,
911        n_perm,
912        n_groups,
913        group_labels: labels,
914    })
915}
916
917impl FosrResult {
918    /// Predict functional responses for new predictors. Delegates to [`predict_fosr`].
919    pub fn predict(&self, new_predictors: &FdMatrix) -> FdMatrix {
920        predict_fosr(self, new_predictors)
921    }
922}
923
924// ---------------------------------------------------------------------------
925// Tests
926// ---------------------------------------------------------------------------
927
928#[cfg(test)]
929mod tests {
930    use super::*;
931    use crate::test_helpers::uniform_grid;
932    use std::f64::consts::PI;
933
934    fn generate_fosr_data(n: usize, m: usize) -> (FdMatrix, FdMatrix) {
935        let t = uniform_grid(m);
936        let mut y = FdMatrix::zeros(n, m);
937        let mut z = FdMatrix::zeros(n, 2);
938
939        for i in 0..n {
940            let age = (i as f64) / (n as f64);
941            let group = if i % 2 == 0 { 1.0 } else { 0.0 };
942            z[(i, 0)] = age;
943            z[(i, 1)] = group;
944            for j in 0..m {
945                // True model: μ(t) + age * β₁(t) + group * β₂(t)
946                let mu = (2.0 * PI * t[j]).sin();
947                let beta1 = t[j]; // Linear coefficient for age
948                let beta2 = (4.0 * PI * t[j]).cos(); // Oscillating for group
949                y[(i, j)] = mu
950                    + age * beta1
951                    + group * beta2
952                    + 0.05 * ((i * 13 + j * 7) % 100) as f64 / 100.0;
953            }
954        }
955        (y, z)
956    }
957
958    // ----- FOSR tests -----
959
960    #[test]
961    fn test_fosr_basic() {
962        let (y, z) = generate_fosr_data(30, 50);
963        let result = fosr(&y, &z, 0.0);
964        assert!(result.is_ok());
965        let fit = result.unwrap();
966        assert_eq!(fit.intercept.len(), 50);
967        assert_eq!(fit.beta.shape(), (2, 50));
968        assert_eq!(fit.fitted.shape(), (30, 50));
969        assert_eq!(fit.residuals.shape(), (30, 50));
970        assert!(fit.r_squared >= 0.0);
971    }
972
973    #[test]
974    fn test_fosr_with_penalty() {
975        let (y, z) = generate_fosr_data(30, 50);
976        let fit0 = fosr(&y, &z, 0.0).unwrap();
977        let fit1 = fosr(&y, &z, 1.0).unwrap();
978        // Both should produce valid results
979        assert_eq!(fit0.beta.shape(), (2, 50));
980        assert_eq!(fit1.beta.shape(), (2, 50));
981    }
982
983    #[test]
984    fn test_fosr_auto_lambda() {
985        let (y, z) = generate_fosr_data(30, 50);
986        let fit = fosr(&y, &z, -1.0).unwrap();
987        assert!(fit.lambda >= 0.0);
988    }
989
990    #[test]
991    fn test_fosr_fitted_plus_residuals_equals_y() {
992        let (y, z) = generate_fosr_data(30, 50);
993        let fit = fosr(&y, &z, 0.0).unwrap();
994        for i in 0..30 {
995            for t in 0..50 {
996                let reconstructed = fit.fitted[(i, t)] + fit.residuals[(i, t)];
997                assert!(
998                    (reconstructed - y[(i, t)]).abs() < 1e-10,
999                    "ŷ + r should equal y at ({}, {})",
1000                    i,
1001                    t
1002                );
1003            }
1004        }
1005    }
1006
1007    #[test]
1008    fn test_fosr_pointwise_r_squared_valid() {
1009        let (y, z) = generate_fosr_data(30, 50);
1010        let fit = fosr(&y, &z, 0.0).unwrap();
1011        for &r2 in &fit.r_squared_t {
1012            assert!(
1013                (-0.01..=1.0 + 1e-10).contains(&r2),
1014                "R²(t) out of range: {}",
1015                r2
1016            );
1017        }
1018    }
1019
1020    #[test]
1021    fn test_fosr_se_positive() {
1022        let (y, z) = generate_fosr_data(30, 50);
1023        let fit = fosr(&y, &z, 0.0).unwrap();
1024        for j in 0..2 {
1025            for t in 0..50 {
1026                assert!(
1027                    fit.beta_se[(j, t)] >= 0.0 && fit.beta_se[(j, t)].is_finite(),
1028                    "SE should be non-negative finite"
1029                );
1030            }
1031        }
1032    }
1033
1034    #[test]
1035    fn test_fosr_invalid_input() {
1036        let y = FdMatrix::zeros(2, 50);
1037        let z = FdMatrix::zeros(2, 1);
1038        assert!(fosr(&y, &z, 0.0).is_err());
1039    }
1040
1041    // ----- predict_fosr tests -----
1042
1043    #[test]
1044    fn test_predict_fosr_on_training_data() {
1045        let (y, z) = generate_fosr_data(30, 50);
1046        let fit = fosr(&y, &z, 0.0).unwrap();
1047        let preds = predict_fosr(&fit, &z);
1048        assert_eq!(preds.shape(), (30, 50));
1049        for i in 0..30 {
1050            for t in 0..50 {
1051                assert!(
1052                    (preds[(i, t)] - fit.fitted[(i, t)]).abs() < 1e-8,
1053                    "Prediction on training data should match fitted"
1054                );
1055            }
1056        }
1057    }
1058
1059    // ----- FANOVA tests -----
1060
1061    #[test]
1062    fn test_fanova_two_groups() {
1063        let n = 40;
1064        let m = 50;
1065        let t = uniform_grid(m);
1066
1067        let mut data = FdMatrix::zeros(n, m);
1068        let mut groups = vec![0usize; n];
1069        for i in 0..n {
1070            groups[i] = if i < n / 2 { 0 } else { 1 };
1071            for j in 0..m {
1072                let base = (2.0 * PI * t[j]).sin();
1073                let effect = if groups[i] == 1 { 0.5 * t[j] } else { 0.0 };
1074                data[(i, j)] = base + effect + 0.01 * (i as f64 * 0.1).sin();
1075            }
1076        }
1077
1078        let result = fanova(&data, &groups, 200);
1079        assert!(result.is_ok());
1080        let res = result.unwrap();
1081        assert_eq!(res.n_groups, 2);
1082        assert_eq!(res.group_means.shape(), (2, m));
1083        assert_eq!(res.f_statistic_t.len(), m);
1084        assert!(res.p_value >= 0.0 && res.p_value <= 1.0);
1085        // With a real group effect, p should be small
1086        assert!(
1087            res.p_value < 0.1,
1088            "Should detect group effect, got p={}",
1089            res.p_value
1090        );
1091    }
1092
1093    #[test]
1094    fn test_fanova_no_effect() {
1095        let n = 40;
1096        let m = 50;
1097        let t = uniform_grid(m);
1098
1099        let mut data = FdMatrix::zeros(n, m);
1100        let mut groups = vec![0usize; n];
1101        for i in 0..n {
1102            groups[i] = if i < n / 2 { 0 } else { 1 };
1103            for j in 0..m {
1104                // Same distribution for both groups
1105                data[(i, j)] =
1106                    (2.0 * PI * t[j]).sin() + 0.1 * ((i * 7 + j * 3) % 100) as f64 / 100.0;
1107            }
1108        }
1109
1110        let result = fanova(&data, &groups, 200);
1111        assert!(result.is_ok());
1112        let res = result.unwrap();
1113        // Without group effect, p should be large
1114        assert!(
1115            res.p_value > 0.05,
1116            "Should not detect effect, got p={}",
1117            res.p_value
1118        );
1119    }
1120
1121    #[test]
1122    fn test_fanova_three_groups() {
1123        let n = 30;
1124        let m = 50;
1125        let t = uniform_grid(m);
1126
1127        let mut data = FdMatrix::zeros(n, m);
1128        let mut groups = vec![0usize; n];
1129        for i in 0..n {
1130            groups[i] = i % 3;
1131            for j in 0..m {
1132                let effect = match groups[i] {
1133                    0 => 0.0,
1134                    1 => 0.5 * t[j],
1135                    _ => -0.3 * (2.0 * PI * t[j]).cos(),
1136                };
1137                data[(i, j)] = (2.0 * PI * t[j]).sin() + effect + 0.01 * (i as f64 * 0.1).sin();
1138            }
1139        }
1140
1141        let result = fanova(&data, &groups, 200);
1142        assert!(result.is_ok());
1143        let res = result.unwrap();
1144        assert_eq!(res.n_groups, 3);
1145    }
1146
1147    #[test]
1148    fn test_fanova_invalid_input() {
1149        let data = FdMatrix::zeros(10, 50);
1150        let groups = vec![0; 10]; // Only one group
1151        assert!(fanova(&data, &groups, 100).is_err());
1152
1153        let groups = vec![0; 5]; // Wrong length
1154        assert!(fanova(&data, &groups, 100).is_err());
1155    }
1156}
fdars_core/function_on_scalar.rs

fdars_core/
function_on_scalar.rs