Skip to main content

fdars_core/scalar_on_function/
mod.rs

1//! Scalar-on-function regression with mixed scalar/functional covariates.
2//!
3//! Implements models of the form:
4//! ```text
5//! y = α + ∫β(t)X(t)dt + γᵀz + ε
6//! ```
7//! where X(t) is a functional predictor, z is a vector of scalar covariates,
8//! β(t) is the functional coefficient, and γ is the vector of scalar coefficients.
9//!
10//! # Methods
11//!
12//! - [`fregre_lm`]: FPC-based functional linear model with optional scalar covariates
13//! - [`fregre_np_mixed`]: Nonparametric kernel regression with product kernels
14//! - [`functional_logistic`]: Logistic regression for binary outcomes
15//! - [`fregre_cv`]: Cross-validation for number of FPC components
16
17use crate::cv::create_folds;
18use crate::error::FdarError;
19use crate::iter_maybe_parallel;
20use crate::matrix::FdMatrix;
21use crate::regression::{fdata_to_pc_1d, FpcaResult};
22use rand::prelude::*;
23
24mod bootstrap;
25mod cv;
26mod fregre_lm;
27mod logistic;
28mod nonparametric;
29#[cfg(test)]
30mod tests;
31
32// Re-export all public items from submodules
33pub use bootstrap::{bootstrap_ci_fregre_lm, bootstrap_ci_functional_logistic};
34pub use cv::{fregre_basis_cv, fregre_np_cv};
35pub use fregre_lm::{fregre_cv, fregre_lm, model_selection_ncomp, predict_fregre_lm};
36pub use logistic::{functional_logistic, predict_functional_logistic};
37pub use nonparametric::{fregre_np_mixed, predict_fregre_np};
38
39// ---------------------------------------------------------------------------
40// Result types
41// ---------------------------------------------------------------------------
42
43/// Result of functional linear regression.
44#[derive(Debug, Clone, PartialEq)]
45pub struct FregreLmResult {
46    /// Intercept α
47    pub intercept: f64,
48    /// Functional coefficient β(t), evaluated on the original grid (length m)
49    pub beta_t: Vec<f64>,
50    /// Pointwise standard errors of β(t) (length m)
51    pub beta_se: Vec<f64>,
52    /// Scalar coefficients γ (one per scalar covariate)
53    pub gamma: Vec<f64>,
54    /// Fitted values ŷ (length n)
55    pub fitted_values: Vec<f64>,
56    /// Residuals y - ŷ (length n)
57    pub residuals: Vec<f64>,
58    /// R² statistic
59    pub r_squared: f64,
60    /// Adjusted R²
61    pub r_squared_adj: f64,
62    /// Standard errors of all coefficients (intercept, FPC scores, scalar covariates)
63    pub std_errors: Vec<f64>,
64    /// Number of FPC components used
65    pub ncomp: usize,
66    /// FPCA result (for projecting new data)
67    pub fpca: FpcaResult,
68    /// Regression coefficients on (FPC scores, scalar covariates) — internal
69    pub coefficients: Vec<f64>,
70    /// Residual standard error
71    pub residual_se: f64,
72    /// GCV criterion value (if computed)
73    pub gcv: f64,
74    /// Akaike Information Criterion
75    pub aic: f64,
76    /// Bayesian Information Criterion
77    pub bic: f64,
78}
79
80/// Result of nonparametric functional regression with mixed predictors.
81#[derive(Debug, Clone, PartialEq)]
82pub struct FregreNpResult {
83    /// Fitted values ŷ (length n)
84    pub fitted_values: Vec<f64>,
85    /// Residuals y - ŷ (length n)
86    pub residuals: Vec<f64>,
87    /// R² statistic
88    pub r_squared: f64,
89    /// Bandwidth for functional distance kernel
90    pub h_func: f64,
91    /// Bandwidth for scalar covariates kernel
92    pub h_scalar: f64,
93    /// Leave-one-out CV error
94    pub cv_error: f64,
95}
96
97/// Result of functional logistic regression.
98#[derive(Debug, Clone, PartialEq)]
99pub struct FunctionalLogisticResult {
100    /// Intercept α
101    pub intercept: f64,
102    /// Functional coefficient β(t), evaluated on the original grid (length m)
103    pub beta_t: Vec<f64>,
104    /// Pointwise standard errors of β(t) (length m)
105    pub beta_se: Vec<f64>,
106    /// Scalar coefficients γ (one per scalar covariate)
107    pub gamma: Vec<f64>,
108    /// Predicted probabilities P(Y=1) (length n)
109    pub probabilities: Vec<f64>,
110    /// Predicted class labels (0 or 1)
111    pub predicted_classes: Vec<usize>,
112    /// Number of FPC components used
113    pub ncomp: usize,
114    /// Classification accuracy on training data
115    pub accuracy: f64,
116    /// Standard errors of all coefficients (intercept, FPC scores, scalar covariates)
117    pub std_errors: Vec<f64>,
118    /// Regression coefficients on (FPC scores, scalar covariates) — internal
119    pub coefficients: Vec<f64>,
120    /// Log-likelihood at convergence
121    pub log_likelihood: f64,
122    /// Number of IRLS iterations
123    pub iterations: usize,
124    /// FPCA result (for projecting new data)
125    pub fpca: FpcaResult,
126    /// Akaike Information Criterion
127    pub aic: f64,
128    /// Bayesian Information Criterion
129    pub bic: f64,
130}
131
132/// Result of cross-validation for K selection.
133#[derive(Debug, Clone, PartialEq)]
134pub struct FregreCvResult {
135    /// Candidate K values tested
136    pub k_values: Vec<usize>,
137    /// CV error for each K
138    pub cv_errors: Vec<f64>,
139    /// Optimal K (minimizing CV error)
140    pub optimal_k: usize,
141    /// Minimum CV error
142    pub min_cv_error: f64,
143}
144
145/// Criterion used for model selection.
146#[derive(Debug, Clone, Copy, PartialEq)]
147pub enum SelectionCriterion {
148    /// Akaike Information Criterion
149    Aic,
150    /// Bayesian Information Criterion
151    Bic,
152    /// Generalized Cross-Validation
153    Gcv,
154}
155
156/// Result of ncomp model selection.
157#[derive(Debug, Clone, PartialEq)]
158pub struct ModelSelectionResult {
159    /// Best number of FPC components by the chosen criterion
160    pub best_ncomp: usize,
161    /// (ncomp, AIC, BIC, GCV) for each candidate
162    pub criteria: Vec<(usize, f64, f64, f64)>,
163}
164
165/// Result of bootstrap confidence intervals for β(t).
166#[derive(Debug, Clone, PartialEq)]
167pub struct BootstrapCiResult {
168    /// Pointwise lower bound (length m).
169    pub lower: Vec<f64>,
170    /// Pointwise upper bound (length m).
171    pub upper: Vec<f64>,
172    /// Original β(t) estimate (length m).
173    pub center: Vec<f64>,
174    /// Simultaneous lower bound (sup-norm adjusted, length m).
175    pub sim_lower: Vec<f64>,
176    /// Simultaneous upper bound (sup-norm adjusted, length m).
177    pub sim_upper: Vec<f64>,
178    /// Number of bootstrap replicates that converged.
179    pub n_boot_success: usize,
180}
181
182/// Result of lambda selection for basis regression via cross-validation.
183#[derive(Debug, Clone, PartialEq)]
184pub struct FregreBasisCvResult {
185    /// Optimal smoothing parameter lambda.
186    pub optimal_lambda: f64,
187    /// Mean CV error for each lambda.
188    pub cv_errors: Vec<f64>,
189    /// SE of CV error across folds for each lambda.
190    pub cv_se: Vec<f64>,
191    /// Lambda values tested.
192    pub lambda_values: Vec<f64>,
193    /// Minimum mean CV error.
194    pub min_cv_error: f64,
195}
196
197/// Result of bandwidth selection for nonparametric regression via CV.
198#[derive(Debug, Clone, PartialEq)]
199pub struct FregreNpCvResult {
200    /// Optimal bandwidth.
201    pub optimal_h: f64,
202    /// Mean CV error for each bandwidth.
203    pub cv_errors: Vec<f64>,
204    /// SE of CV error across folds for each bandwidth.
205    pub cv_se: Vec<f64>,
206    /// Bandwidth values tested.
207    pub h_values: Vec<f64>,
208    /// Minimum mean CV error.
209    pub min_cv_error: f64,
210}
211
212// ---------------------------------------------------------------------------
213// Shared linear algebra helpers
214// ---------------------------------------------------------------------------
215
216/// Compute X'X (symmetric, p×p stored flat row-major).
217pub(crate) fn compute_xtx(x: &FdMatrix) -> Vec<f64> {
218    let (n, p) = x.shape();
219    let mut xtx = vec![0.0; p * p];
220    for k in 0..p {
221        for j in k..p {
222            let mut s = 0.0;
223            for i in 0..n {
224                s += x[(i, k)] * x[(i, j)];
225            }
226            xtx[k * p + j] = s;
227            xtx[j * p + k] = s;
228        }
229    }
230    xtx
231}
232
233/// Compute X'y (length p).
234fn compute_xty(x: &FdMatrix, y: &[f64]) -> Vec<f64> {
235    let (n, p) = x.shape();
236    (0..p)
237        .map(|k| {
238            let mut s = 0.0;
239            for i in 0..n {
240                s += x[(i, k)] * y[i];
241            }
242            s
243        })
244        .collect()
245}
246
247/// Cholesky factorization: A = LL'. Returns L (p×p flat row-major) or error if singular.
248pub(crate) fn cholesky_factor(a: &[f64], p: usize) -> Result<Vec<f64>, FdarError> {
249    let mut l = vec![0.0; p * p];
250    for j in 0..p {
251        let mut diag = a[j * p + j];
252        for k in 0..j {
253            diag -= l[j * p + k] * l[j * p + k];
254        }
255        if diag <= 1e-12 {
256            return Err(FdarError::ComputationFailed {
257                operation: "Cholesky factorization",
258                detail: "matrix is singular or near-singular".into(),
259            });
260        }
261        l[j * p + j] = diag.sqrt();
262        for i in (j + 1)..p {
263            let mut s = a[i * p + j];
264            for k in 0..j {
265                s -= l[i * p + k] * l[j * p + k];
266            }
267            l[i * p + j] = s / l[j * p + j];
268        }
269    }
270    Ok(l)
271}
272
273/// Solve Lz = b (forward) then L'x = z (back). L is p×p flat row-major.
274pub(crate) fn cholesky_forward_back(l: &[f64], b: &[f64], p: usize) -> Vec<f64> {
275    let mut z = b.to_vec();
276    for j in 0..p {
277        for k in 0..j {
278            z[j] -= l[j * p + k] * z[k];
279        }
280        z[j] /= l[j * p + j];
281    }
282    for j in (0..p).rev() {
283        for k in (j + 1)..p {
284            z[j] -= l[k * p + j] * z[k];
285        }
286        z[j] /= l[j * p + j];
287    }
288    z
289}
290
291/// Solve Ax = b via Cholesky decomposition (A must be symmetric positive definite).
292fn cholesky_solve(a: &[f64], b: &[f64], p: usize) -> Result<Vec<f64>, FdarError> {
293    let l = cholesky_factor(a, p)?;
294    Ok(cholesky_forward_back(&l, b, p))
295}
296
297/// Compute hat matrix diagonal: H_ii = x_i' (X'X)^{-1} x_i, given Cholesky factor L of X'X.
298pub(crate) fn compute_hat_diagonal(x: &FdMatrix, l: &[f64]) -> Vec<f64> {
299    let (n, p) = x.shape();
300    let mut hat_diag = vec![0.0; n];
301    for i in 0..n {
302        let mut v = vec![0.0; p];
303        for j in 0..p {
304            v[j] = x[(i, j)];
305            for k in 0..j {
306                v[j] -= l[j * p + k] * v[k];
307            }
308            v[j] /= l[j * p + j];
309        }
310        hat_diag[i] = v.iter().map(|vi| vi * vi).sum();
311    }
312    hat_diag
313}
314
315/// Compute diagonal of (X'X)^{-1} given Cholesky factor L, then SE = sqrt(sigma² * diag).
316fn compute_ols_std_errors(l: &[f64], p: usize, sigma2: f64) -> Vec<f64> {
317    let mut se = vec![0.0; p];
318    for j in 0..p {
319        let mut v = vec![0.0; p];
320        v[j] = 1.0;
321        for k in 0..p {
322            for kk in 0..k {
323                v[k] -= l[k * p + kk] * v[kk];
324            }
325            v[k] /= l[k * p + k];
326        }
327        se[j] = (sigma2 * v.iter().map(|vi| vi * vi).sum::<f64>()).sqrt();
328    }
329    se
330}
331
332// ---------------------------------------------------------------------------
333// Design matrix and coefficient recovery
334// ---------------------------------------------------------------------------
335
336/// Build design matrix: \[1, ξ_1, ..., ξ_K, z_1, ..., z_p\].
337/// Validate inputs for fregre_lm / functional_logistic.
338fn validate_fregre_inputs(
339    n: usize,
340    m: usize,
341    y: &[f64],
342    scalar_covariates: Option<&FdMatrix>,
343) -> Result<(), FdarError> {
344    if n < 3 {
345        return Err(FdarError::InvalidDimension {
346            parameter: "data",
347            expected: "at least 3 rows".to_string(),
348            actual: format!("{n}"),
349        });
350    }
351    if m == 0 {
352        return Err(FdarError::InvalidDimension {
353            parameter: "data",
354            expected: "at least 1 column".to_string(),
355            actual: "0".to_string(),
356        });
357    }
358    if y.len() != n {
359        return Err(FdarError::InvalidDimension {
360            parameter: "y",
361            expected: format!("{n}"),
362            actual: format!("{}", y.len()),
363        });
364    }
365    if let Some(sc) = scalar_covariates {
366        if sc.nrows() != n {
367            return Err(FdarError::InvalidDimension {
368                parameter: "scalar_covariates",
369                expected: format!("{n} rows"),
370                actual: format!("{} rows", sc.nrows()),
371            });
372        }
373    }
374    Ok(())
375}
376
377/// Resolve ncomp: auto-select via CV if 0, otherwise clamp.
378fn resolve_ncomp(
379    ncomp: usize,
380    data: &FdMatrix,
381    y: &[f64],
382    scalar_covariates: Option<&FdMatrix>,
383    n: usize,
384    m: usize,
385) -> Result<usize, FdarError> {
386    if ncomp == 0 {
387        let cv = fregre_cv(data, y, scalar_covariates, 1, m.min(n - 1).min(20), 5)?;
388        Ok(cv.optimal_k)
389    } else {
390        Ok(ncomp.min(n - 1).min(m))
391    }
392}
393
394pub(crate) fn build_design_matrix(
395    fpca_scores: &FdMatrix,
396    ncomp: usize,
397    scalar_covariates: Option<&FdMatrix>,
398    n: usize,
399) -> FdMatrix {
400    let p_scalar = scalar_covariates.map_or(0, super::matrix::FdMatrix::ncols);
401    let p_total = 1 + ncomp + p_scalar;
402    let mut design = FdMatrix::zeros(n, p_total);
403    for i in 0..n {
404        design[(i, 0)] = 1.0;
405        for k in 0..ncomp {
406            design[(i, 1 + k)] = fpca_scores[(i, k)];
407        }
408        if let Some(sc) = scalar_covariates {
409            for j in 0..p_scalar {
410                design[(i, 1 + ncomp + j)] = sc[(i, j)];
411            }
412        }
413    }
414    design
415}
416
417/// Recover functional coefficient β(t) = Σ_k γ_k φ_k(t).
418fn recover_beta_t(fpc_coeffs: &[f64], rotation: &FdMatrix, m: usize) -> Vec<f64> {
419    let ncomp = fpc_coeffs.len();
420    let mut beta_t = vec![0.0; m];
421    for k in 0..ncomp {
422        for j in 0..m {
423            beta_t[j] += fpc_coeffs[k] * rotation[(j, k)];
424        }
425    }
426    beta_t
427}
428
429/// Pointwise standard error of β(t) via error propagation through FPCA rotation.
430///
431/// SE[β(t_j)]² = Σ_k φ_k(t_j)² · SE[γ_k]²
432fn compute_beta_se(gamma_se: &[f64], rotation: &FdMatrix, m: usize) -> Vec<f64> {
433    let ncomp = gamma_se.len();
434    let mut beta_se = vec![0.0; m];
435    for j in 0..m {
436        let mut var_j = 0.0;
437        for k in 0..ncomp {
438            var_j += rotation[(j, k)].powi(2) * gamma_se[k].powi(2);
439        }
440        beta_se[j] = var_j.sqrt();
441    }
442    beta_se
443}
444
445/// Compute fitted values ŷ = X β.
446fn compute_fitted(design: &FdMatrix, coeffs: &[f64]) -> Vec<f64> {
447    let (n, p) = design.shape();
448    (0..n)
449        .map(|i| {
450            let mut yhat = 0.0;
451            for j in 0..p {
452                yhat += design[(i, j)] * coeffs[j];
453            }
454            yhat
455        })
456        .collect()
457}
458
459/// Compute R² and adjusted R².
460fn compute_r_squared(y: &[f64], residuals: &[f64], p_total: usize) -> (f64, f64) {
461    let n = y.len();
462    let y_mean = y.iter().sum::<f64>() / n as f64;
463    let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
464    let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
465    let r_squared = if ss_tot > 0.0 {
466        1.0 - ss_res / ss_tot
467    } else {
468        0.0
469    };
470    let df_model = (p_total - 1) as f64;
471    let r_squared_adj = if n as f64 - df_model - 1.0 > 0.0 {
472        1.0 - (1.0 - r_squared) * (n as f64 - 1.0) / (n as f64 - df_model - 1.0)
473    } else {
474        r_squared
475    };
476    (r_squared, r_squared_adj)
477}
478
479// ---------------------------------------------------------------------------
480// OLS solver
481// ---------------------------------------------------------------------------
482
483/// Solve ordinary least squares: min ||Xb - y||² via normal equations with Cholesky.
484/// Returns (coefficients, hat_matrix_diagonal) or error if singular.
485fn ols_solve(x: &FdMatrix, y: &[f64]) -> Result<(Vec<f64>, Vec<f64>), FdarError> {
486    let (n, p) = x.shape();
487    if n < p || p == 0 {
488        return Err(FdarError::InvalidDimension {
489            parameter: "design matrix",
490            expected: format!("n >= p and p > 0 (p={p})"),
491            actual: format!("n={n}, p={p}"),
492        });
493    }
494    let xtx = compute_xtx(x);
495    let xty = compute_xty(x, y);
496    let l = cholesky_factor(&xtx, p)?;
497    let b = cholesky_forward_back(&l, &xty, p);
498    let hat_diag = compute_hat_diagonal(x, &l);
499    Ok((b, hat_diag))
500}
501
502/// Sigmoid function: 1 / (1 + exp(-x))
503pub(crate) fn sigmoid(x: f64) -> f64 {
504    if x >= 0.0 {
505        1.0 / (1.0 + (-x).exp())
506    } else {
507        let ex = x.exp();
508        ex / (1.0 + ex)
509    }
510}
511
512// ---------------------------------------------------------------------------
513// Predict methods on result structs
514// ---------------------------------------------------------------------------
515
516impl FregreLmResult {
517    /// Predict new responses. Delegates to [`predict_fregre_lm`].
518    pub fn predict(&self, new_data: &FdMatrix, new_scalar: Option<&FdMatrix>) -> Vec<f64> {
519        predict_fregre_lm(self, new_data, new_scalar)
520    }
521}
522
523impl FunctionalLogisticResult {
524    /// Predict P(Y=1) for new data. Delegates to [`predict_functional_logistic`].
525    pub fn predict(&self, new_data: &FdMatrix, new_scalar: Option<&FdMatrix>) -> Vec<f64> {
526        predict_functional_logistic(self, new_data, new_scalar)
527    }
528}