Skip to main content

fdars_core/scalar_on_function/
mod.rs

1//! Scalar-on-function regression with mixed scalar/functional covariates.
2//!
3//! Implements models of the form:
4//! ```text
5//! y = α + ∫β(t)X(t)dt + γᵀz + ε
6//! ```
7//! where X(t) is a functional predictor, z is a vector of scalar covariates,
8//! β(t) is the functional coefficient, and γ is the vector of scalar coefficients.
9//!
10//! # Methods
11//!
12//! - [`fregre_lm`]: FPC-based functional linear model with optional scalar covariates
13//! - [`fregre_l1`]: L1 (median) robust functional regression via IRLS
14//! - [`fregre_huber`]: Huber M-estimation robust functional regression via IRLS
15//! - [`fregre_np_mixed`]: Nonparametric kernel regression with product kernels
16//! - [`functional_logistic`]: Logistic regression for binary outcomes
17//! - [`fregre_cv`]: Cross-validation for number of FPC components
18
19use crate::error::FdarError;
20use crate::matrix::FdMatrix;
21use crate::regression::{FpcaResult, PlsResult};
22
23mod bootstrap;
24mod cv;
25mod fregre_lm;
26mod logistic;
27mod nonparametric;
28mod pls;
29mod robust;
30#[cfg(test)]
31mod tests;
32
33// Re-export all public items from submodules
34pub use bootstrap::{bootstrap_ci_fregre_lm, bootstrap_ci_functional_logistic};
35pub use cv::{fregre_basis_cv, fregre_np_cv};
36pub use fregre_lm::{fregre_cv, fregre_lm, model_selection_ncomp, predict_fregre_lm};
37pub use logistic::{functional_logistic, predict_functional_logistic};
38pub use nonparametric::{
39    fregre_np_from_distances, fregre_np_mixed, predict_fregre_np, predict_fregre_np_from_distances,
40};
41pub use pls::{fregre_pls, predict_fregre_pls};
42pub use robust::{fregre_huber, fregre_l1, predict_fregre_robust};
43
44// ---------------------------------------------------------------------------
45// Result types
46// ---------------------------------------------------------------------------
47
48/// Result of functional linear regression.
49#[derive(Debug, Clone, PartialEq)]
50pub struct FregreLmResult {
51    /// Intercept α
52    pub intercept: f64,
53    /// Functional coefficient β(t), evaluated on the original grid (length m)
54    pub beta_t: Vec<f64>,
55    /// Pointwise standard errors of β(t) (length m)
56    pub beta_se: Vec<f64>,
57    /// Scalar coefficients γ (one per scalar covariate)
58    pub gamma: Vec<f64>,
59    /// Fitted values ŷ (length n)
60    pub fitted_values: Vec<f64>,
61    /// Residuals y - ŷ (length n)
62    pub residuals: Vec<f64>,
63    /// R² statistic
64    pub r_squared: f64,
65    /// Adjusted R²
66    pub r_squared_adj: f64,
67    /// Standard errors of all coefficients (intercept, FPC scores, scalar covariates)
68    pub std_errors: Vec<f64>,
69    /// Number of FPC components used
70    pub ncomp: usize,
71    /// FPCA result (for projecting new data)
72    pub fpca: FpcaResult,
73    /// Regression coefficients on (FPC scores, scalar covariates) — internal
74    pub coefficients: Vec<f64>,
75    /// Residual standard error
76    pub residual_se: f64,
77    /// GCV criterion value (if computed)
78    pub gcv: f64,
79    /// Akaike Information Criterion
80    pub aic: f64,
81    /// Bayesian Information Criterion
82    pub bic: f64,
83}
84
85/// Result of nonparametric functional regression with mixed predictors.
86#[derive(Debug, Clone, PartialEq)]
87pub struct FregreNpResult {
88    /// Fitted values ŷ (length n)
89    pub fitted_values: Vec<f64>,
90    /// Residuals y - ŷ (length n)
91    pub residuals: Vec<f64>,
92    /// R² statistic
93    pub r_squared: f64,
94    /// Bandwidth for functional distance kernel
95    pub h_func: f64,
96    /// Bandwidth for scalar covariates kernel
97    pub h_scalar: f64,
98    /// Leave-one-out CV error
99    pub cv_error: f64,
100}
101
102/// Result of robust (L1 or Huber) functional regression.
103#[derive(Debug, Clone, PartialEq)]
104#[non_exhaustive]
105pub struct FregreRobustResult {
106    /// Intercept
107    pub intercept: f64,
108    /// Functional coefficient β(t), evaluated on the original grid (length m)
109    pub beta_t: Vec<f64>,
110    /// Fitted values ŷ (length n)
111    pub fitted_values: Vec<f64>,
112    /// Residuals y - ŷ (length n)
113    pub residuals: Vec<f64>,
114    /// Regression coefficients (intercept, FPC scores, scalar covariates)
115    pub coefficients: Vec<f64>,
116    /// Number of FPC components used
117    pub ncomp: usize,
118    /// FPCA result (for projecting new data)
119    pub fpca: FpcaResult,
120    /// Number of IRLS iterations performed
121    pub iterations: usize,
122    /// Whether the IRLS algorithm converged
123    pub converged: bool,
124    /// Final IRLS weights (length n)
125    pub weights: Vec<f64>,
126    /// R² statistic
127    pub r_squared: f64,
128}
129
130/// Result of functional logistic regression.
131#[derive(Debug, Clone, PartialEq)]
132pub struct FunctionalLogisticResult {
133    /// Intercept α
134    pub intercept: f64,
135    /// Functional coefficient β(t), evaluated on the original grid (length m)
136    pub beta_t: Vec<f64>,
137    /// Pointwise standard errors of β(t) (length m)
138    pub beta_se: Vec<f64>,
139    /// Scalar coefficients γ (one per scalar covariate)
140    pub gamma: Vec<f64>,
141    /// Predicted probabilities P(Y=1) (length n)
142    pub probabilities: Vec<f64>,
143    /// Predicted class labels (0 or 1)
144    pub predicted_classes: Vec<usize>,
145    /// Number of FPC components used
146    pub ncomp: usize,
147    /// Classification accuracy on training data
148    pub accuracy: f64,
149    /// Standard errors of all coefficients (intercept, FPC scores, scalar covariates)
150    pub std_errors: Vec<f64>,
151    /// Regression coefficients on (FPC scores, scalar covariates) — internal
152    pub coefficients: Vec<f64>,
153    /// Log-likelihood at convergence
154    pub log_likelihood: f64,
155    /// Number of IRLS iterations
156    pub iterations: usize,
157    /// FPCA result (for projecting new data)
158    pub fpca: FpcaResult,
159    /// Akaike Information Criterion
160    pub aic: f64,
161    /// Bayesian Information Criterion
162    pub bic: f64,
163}
164
165/// Result of cross-validation for K selection.
166#[derive(Debug, Clone, PartialEq)]
167#[non_exhaustive]
168pub struct FregreCvResult {
169    /// Candidate K values tested
170    pub k_values: Vec<usize>,
171    /// CV error for each K
172    pub cv_errors: Vec<f64>,
173    /// Optimal K (minimizing CV error)
174    pub optimal_k: usize,
175    /// Minimum CV error
176    pub min_cv_error: f64,
177    /// Out-of-fold predictions at optimal K (length n, each predicted when held out)
178    pub oof_predictions: Vec<f64>,
179    /// Fold assignment for each observation (0..n_folds)
180    pub fold_assignments: Vec<usize>,
181    /// Per-fold MSE at optimal K
182    pub fold_errors: Vec<f64>,
183}
184
185/// Result of PLS-based scalar-on-function regression.
186#[derive(Debug, Clone, PartialEq)]
187#[non_exhaustive]
188pub struct PlsRegressionResult {
189    /// Intercept α
190    pub intercept: f64,
191    /// Functional coefficient β(t), evaluated on the original grid (length m)
192    pub beta_t: Vec<f64>,
193    /// Scalar coefficients γ (one per scalar covariate)
194    pub gamma: Vec<f64>,
195    /// Fitted values ŷ (length n)
196    pub fitted_values: Vec<f64>,
197    /// Residuals y - ŷ (length n)
198    pub residuals: Vec<f64>,
199    /// R² statistic
200    pub r_squared: f64,
201    /// Adjusted R²
202    pub r_squared_adj: f64,
203    /// Number of PLS components used
204    pub ncomp: usize,
205    /// PLS result (for projecting new data)
206    pub pls: PlsResult,
207    /// Regression coefficients on (intercept, PLS scores, scalar covariates)
208    pub coefficients: Vec<f64>,
209    /// Residual standard error
210    pub residual_se: f64,
211    /// Akaike Information Criterion
212    pub aic: f64,
213    /// Bayesian Information Criterion
214    pub bic: f64,
215}
216
217/// Criterion used for model selection.
218#[derive(Debug, Clone, Copy, PartialEq)]
219pub enum SelectionCriterion {
220    /// Akaike Information Criterion
221    Aic,
222    /// Bayesian Information Criterion
223    Bic,
224    /// Generalized Cross-Validation
225    Gcv,
226}
227
228/// Result of ncomp model selection.
229#[derive(Debug, Clone, PartialEq)]
230pub struct ModelSelectionResult {
231    /// Best number of FPC components by the chosen criterion
232    pub best_ncomp: usize,
233    /// (ncomp, AIC, BIC, GCV) for each candidate
234    pub criteria: Vec<(usize, f64, f64, f64)>,
235}
236
237/// Result of bootstrap confidence intervals for β(t).
238#[derive(Debug, Clone, PartialEq)]
239pub struct BootstrapCiResult {
240    /// Pointwise lower bound (length m).
241    pub lower: Vec<f64>,
242    /// Pointwise upper bound (length m).
243    pub upper: Vec<f64>,
244    /// Original β(t) estimate (length m).
245    pub center: Vec<f64>,
246    /// Simultaneous lower bound (sup-norm adjusted, length m).
247    pub sim_lower: Vec<f64>,
248    /// Simultaneous upper bound (sup-norm adjusted, length m).
249    pub sim_upper: Vec<f64>,
250    /// Number of bootstrap replicates that converged.
251    pub n_boot_success: usize,
252}
253
254/// Result of lambda selection for basis regression via cross-validation.
255#[derive(Debug, Clone, PartialEq)]
256pub struct FregreBasisCvResult {
257    /// Optimal smoothing parameter lambda.
258    pub optimal_lambda: f64,
259    /// Mean CV error for each lambda.
260    pub cv_errors: Vec<f64>,
261    /// SE of CV error across folds for each lambda.
262    pub cv_se: Vec<f64>,
263    /// Lambda values tested.
264    pub lambda_values: Vec<f64>,
265    /// Minimum mean CV error.
266    pub min_cv_error: f64,
267}
268
269/// Result of bandwidth selection for nonparametric regression via CV.
270#[derive(Debug, Clone, PartialEq)]
271pub struct FregreNpCvResult {
272    /// Optimal bandwidth.
273    pub optimal_h: f64,
274    /// Mean CV error for each bandwidth.
275    pub cv_errors: Vec<f64>,
276    /// SE of CV error across folds for each bandwidth.
277    pub cv_se: Vec<f64>,
278    /// Bandwidth values tested.
279    pub h_values: Vec<f64>,
280    /// Minimum mean CV error.
281    pub min_cv_error: f64,
282}
283
284// ---------------------------------------------------------------------------
285// Shared linear algebra helpers
286// ---------------------------------------------------------------------------
287
288/// Compute X'X (symmetric, p×p stored flat row-major).
289pub(crate) fn compute_xtx(x: &FdMatrix) -> Vec<f64> {
290    let (n, p) = x.shape();
291    let mut xtx = vec![0.0; p * p];
292    for k in 0..p {
293        for j in k..p {
294            let mut s = 0.0;
295            for i in 0..n {
296                s += x[(i, k)] * x[(i, j)];
297            }
298            xtx[k * p + j] = s;
299            xtx[j * p + k] = s;
300        }
301    }
302    xtx
303}
304
305/// Compute X'y (length p).
306fn compute_xty(x: &FdMatrix, y: &[f64]) -> Vec<f64> {
307    let (n, p) = x.shape();
308    (0..p)
309        .map(|k| {
310            let mut s = 0.0;
311            for i in 0..n {
312                s += x[(i, k)] * y[i];
313            }
314            s
315        })
316        .collect()
317}
318
319/// Cholesky factorization: A = LL'. Returns L (p×p flat row-major) or error if singular.
320pub(crate) fn cholesky_factor(a: &[f64], p: usize) -> Result<Vec<f64>, FdarError> {
321    let mut l = vec![0.0; p * p];
322    for j in 0..p {
323        let mut diag = a[j * p + j];
324        for k in 0..j {
325            diag -= l[j * p + k] * l[j * p + k];
326        }
327        if diag <= 1e-12 {
328            return Err(FdarError::ComputationFailed {
329                operation: "Cholesky factorization",
330                detail: "matrix is singular or near-singular; try reducing ncomp or check for collinear FPC scores".into(),
331            });
332        }
333        l[j * p + j] = diag.sqrt();
334        for i in (j + 1)..p {
335            let mut s = a[i * p + j];
336            for k in 0..j {
337                s -= l[i * p + k] * l[j * p + k];
338            }
339            l[i * p + j] = s / l[j * p + j];
340        }
341    }
342    Ok(l)
343}
344
345/// Solve Lz = b (forward) then L'x = z (back). L is p×p flat row-major.
346pub(crate) fn cholesky_forward_back(l: &[f64], b: &[f64], p: usize) -> Vec<f64> {
347    let mut z = b.to_vec();
348    for j in 0..p {
349        for k in 0..j {
350            z[j] -= l[j * p + k] * z[k];
351        }
352        z[j] /= l[j * p + j];
353    }
354    for j in (0..p).rev() {
355        for k in (j + 1)..p {
356            z[j] -= l[k * p + j] * z[k];
357        }
358        z[j] /= l[j * p + j];
359    }
360    z
361}
362
363/// Solve Ax = b via Cholesky decomposition (A must be symmetric positive definite).
364pub(super) fn cholesky_solve(a: &[f64], b: &[f64], p: usize) -> Result<Vec<f64>, FdarError> {
365    let l = cholesky_factor(a, p)?;
366    Ok(cholesky_forward_back(&l, b, p))
367}
368
369/// Compute hat matrix diagonal: H_ii = x_i' (X'X)^{-1} x_i, given Cholesky factor L of X'X.
370pub(crate) fn compute_hat_diagonal(x: &FdMatrix, l: &[f64]) -> Vec<f64> {
371    let (n, p) = x.shape();
372    let mut hat_diag = vec![0.0; n];
373    for i in 0..n {
374        let mut v = vec![0.0; p];
375        for j in 0..p {
376            v[j] = x[(i, j)];
377            for k in 0..j {
378                v[j] -= l[j * p + k] * v[k];
379            }
380            v[j] /= l[j * p + j];
381        }
382        hat_diag[i] = v.iter().map(|vi| vi * vi).sum();
383    }
384    hat_diag
385}
386
387/// Compute diagonal of (X'X)^{-1} given Cholesky factor L, then SE = sqrt(sigma² * diag).
388fn compute_ols_std_errors(l: &[f64], p: usize, sigma2: f64) -> Vec<f64> {
389    let mut se = vec![0.0; p];
390    for j in 0..p {
391        let mut v = vec![0.0; p];
392        v[j] = 1.0;
393        for k in 0..p {
394            for kk in 0..k {
395                v[k] -= l[k * p + kk] * v[kk];
396            }
397            v[k] /= l[k * p + k];
398        }
399        se[j] = (sigma2 * v.iter().map(|vi| vi * vi).sum::<f64>()).sqrt();
400    }
401    se
402}
403
404// ---------------------------------------------------------------------------
405// Design matrix and coefficient recovery
406// ---------------------------------------------------------------------------
407
408/// Build design matrix: \[1, ξ_1, ..., ξ_K, z_1, ..., z_p\].
409/// Validate inputs for fregre_lm / functional_logistic.
410fn validate_fregre_inputs(
411    n: usize,
412    m: usize,
413    y: &[f64],
414    scalar_covariates: Option<&FdMatrix>,
415) -> Result<(), FdarError> {
416    if n < 3 {
417        return Err(FdarError::InvalidDimension {
418            parameter: "data",
419            expected: "at least 3 rows".to_string(),
420            actual: format!("{n}"),
421        });
422    }
423    if m == 0 {
424        return Err(FdarError::InvalidDimension {
425            parameter: "data",
426            expected: "at least 1 column".to_string(),
427            actual: "0".to_string(),
428        });
429    }
430    if y.len() != n {
431        return Err(FdarError::InvalidDimension {
432            parameter: "y",
433            expected: format!("{n}"),
434            actual: format!("{}", y.len()),
435        });
436    }
437    if let Some(sc) = scalar_covariates {
438        if sc.nrows() != n {
439            return Err(FdarError::InvalidDimension {
440                parameter: "scalar_covariates",
441                expected: format!("{n} rows"),
442                actual: format!("{} rows", sc.nrows()),
443            });
444        }
445    }
446    Ok(())
447}
448
449/// Resolve ncomp: auto-select via CV if 0, otherwise clamp.
450fn resolve_ncomp(
451    ncomp: usize,
452    data: &FdMatrix,
453    y: &[f64],
454    scalar_covariates: Option<&FdMatrix>,
455    n: usize,
456    m: usize,
457) -> Result<usize, FdarError> {
458    if ncomp == 0 {
459        let cv = fregre_cv(data, y, scalar_covariates, 1, m.min(n - 1).min(20), 5)?;
460        Ok(cv.optimal_k)
461    } else {
462        Ok(ncomp.min(n - 1).min(m))
463    }
464}
465
466pub(crate) fn build_design_matrix(
467    fpca_scores: &FdMatrix,
468    ncomp: usize,
469    scalar_covariates: Option<&FdMatrix>,
470    n: usize,
471) -> FdMatrix {
472    let p_scalar = scalar_covariates.map_or(0, super::matrix::FdMatrix::ncols);
473    let p_total = 1 + ncomp + p_scalar;
474    let mut design = FdMatrix::zeros(n, p_total);
475    for i in 0..n {
476        design[(i, 0)] = 1.0;
477        for k in 0..ncomp {
478            design[(i, 1 + k)] = fpca_scores[(i, k)];
479        }
480        if let Some(sc) = scalar_covariates {
481            for j in 0..p_scalar {
482                design[(i, 1 + ncomp + j)] = sc[(i, j)];
483            }
484        }
485    }
486    design
487}
488
489/// Recover functional coefficient β(t) = Σ_k γ_k φ_k(t).
490fn recover_beta_t(fpc_coeffs: &[f64], rotation: &FdMatrix, m: usize) -> Vec<f64> {
491    let ncomp = fpc_coeffs.len();
492    let mut beta_t = vec![0.0; m];
493    for k in 0..ncomp {
494        for j in 0..m {
495            beta_t[j] += fpc_coeffs[k] * rotation[(j, k)];
496        }
497    }
498    beta_t
499}
500
501/// Pointwise standard error of β(t) via error propagation through FPCA rotation.
502///
503/// SE[β(t_j)]² = Σ_k φ_k(t_j)² · SE[γ_k]²
504fn compute_beta_se(gamma_se: &[f64], rotation: &FdMatrix, m: usize) -> Vec<f64> {
505    let ncomp = gamma_se.len();
506    let mut beta_se = vec![0.0; m];
507    for j in 0..m {
508        let mut var_j = 0.0;
509        for k in 0..ncomp {
510            var_j += rotation[(j, k)].powi(2) * gamma_se[k].powi(2);
511        }
512        beta_se[j] = var_j.sqrt();
513    }
514    beta_se
515}
516
517/// Compute fitted values ŷ = X β.
518fn compute_fitted(design: &FdMatrix, coeffs: &[f64]) -> Vec<f64> {
519    let (n, p) = design.shape();
520    (0..n)
521        .map(|i| {
522            let mut yhat = 0.0;
523            for j in 0..p {
524                yhat += design[(i, j)] * coeffs[j];
525            }
526            yhat
527        })
528        .collect()
529}
530
531/// Compute R² and adjusted R².
532fn compute_r_squared(y: &[f64], residuals: &[f64], p_total: usize) -> (f64, f64) {
533    let n = y.len();
534    let y_mean = y.iter().sum::<f64>() / n as f64;
535    let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
536    let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
537    let r_squared = if ss_tot > 0.0 {
538        1.0 - ss_res / ss_tot
539    } else {
540        0.0
541    };
542    let df_model = (p_total - 1) as f64;
543    let r_squared_adj = if n as f64 - df_model - 1.0 > 0.0 {
544        1.0 - (1.0 - r_squared) * (n as f64 - 1.0) / (n as f64 - df_model - 1.0)
545    } else {
546        r_squared
547    };
548    (r_squared, r_squared_adj)
549}
550
551// ---------------------------------------------------------------------------
552// OLS solver
553// ---------------------------------------------------------------------------
554
555/// Solve ordinary least squares: min ||Xb - y||² via normal equations with Cholesky.
556/// Returns (coefficients, hat_matrix_diagonal) or error if singular.
557fn ols_solve(x: &FdMatrix, y: &[f64]) -> Result<(Vec<f64>, Vec<f64>), FdarError> {
558    let (n, p) = x.shape();
559    if n < p || p == 0 {
560        return Err(FdarError::InvalidDimension {
561            parameter: "design matrix",
562            expected: format!("n >= p and p > 0 (p={p})"),
563            actual: format!("n={n}, p={p}"),
564        });
565    }
566    let xtx = compute_xtx(x);
567    let xty = compute_xty(x, y);
568    let l = cholesky_factor(&xtx, p)?;
569    let b = cholesky_forward_back(&l, &xty, p);
570    let hat_diag = compute_hat_diagonal(x, &l);
571    Ok((b, hat_diag))
572}
573
574/// Sigmoid function: 1 / (1 + exp(-x))
575pub(crate) fn sigmoid(x: f64) -> f64 {
576    if x >= 0.0 {
577        1.0 / (1.0 + (-x).exp())
578    } else {
579        let ex = x.exp();
580        ex / (1.0 + ex)
581    }
582}
583
584// ---------------------------------------------------------------------------
585// Predict methods on result structs
586// ---------------------------------------------------------------------------
587
588impl FregreLmResult {
589    /// Predict new responses. Delegates to [`predict_fregre_lm`].
590    pub fn predict(&self, new_data: &FdMatrix, new_scalar: Option<&FdMatrix>) -> Vec<f64> {
591        predict_fregre_lm(self, new_data, new_scalar)
592    }
593}
594
595impl FregreRobustResult {
596    /// Predict new responses. Delegates to [`predict_fregre_robust`].
597    pub fn predict(&self, new_data: &FdMatrix, new_scalar: Option<&FdMatrix>) -> Vec<f64> {
598        predict_fregre_robust(self, new_data, new_scalar)
599    }
600}
601
602impl FunctionalLogisticResult {
603    /// Predict P(Y=1) for new data. Delegates to [`predict_functional_logistic`].
604    pub fn predict(&self, new_data: &FdMatrix, new_scalar: Option<&FdMatrix>) -> Vec<f64> {
605        predict_functional_logistic(self, new_data, new_scalar)
606    }
607}