Skip to main content

fdars_core/scalar_on_function/
mod.rs

1//! Scalar-on-function regression with mixed scalar/functional covariates.
2//!
3//! Implements models of the form:
4//! ```text
5//! y = α + ∫β(t)X(t)dt + γᵀz + ε
6//! ```
7//! where X(t) is a functional predictor, z is a vector of scalar covariates,
8//! β(t) is the functional coefficient, and γ is the vector of scalar coefficients.
9//!
10//! # Methods
11//!
12//! - [`fregre_lm`]: FPC-based functional linear model with optional scalar covariates
13//! - [`fregre_l1`]: L1 (median) robust functional regression via IRLS
14//! - [`fregre_huber`]: Huber M-estimation robust functional regression via IRLS
15//! - [`fregre_np_mixed`]: Nonparametric kernel regression with product kernels
16//! - [`functional_logistic`]: Logistic regression for binary outcomes
17//! - [`fregre_cv`]: Cross-validation for number of FPC components
18
19use crate::error::FdarError;
20use crate::linalg::cholesky_solve as linalg_cholesky_solve;
21use crate::matrix::FdMatrix;
22use crate::regression::{FpcaResult, PlsResult};
23
24mod bootstrap;
25mod cv;
26mod fregre_lm;
27mod logistic;
28mod nonparametric;
29mod pls;
30mod robust;
31#[cfg(test)]
32mod tests;
33
34// Re-export all public items from submodules
35pub use bootstrap::{bootstrap_ci_fregre_lm, bootstrap_ci_functional_logistic};
36pub use cv::{fregre_basis_cv, fregre_np_cv};
37pub use fregre_lm::{fregre_cv, fregre_lm, model_selection_ncomp, predict_fregre_lm};
38pub use logistic::{functional_logistic, predict_functional_logistic};
39pub use nonparametric::{
40    fregre_np_from_distances, fregre_np_mixed, predict_fregre_np, predict_fregre_np_from_distances,
41};
42pub use pls::{fregre_pls, predict_fregre_pls};
43pub use robust::{fregre_huber, fregre_l1, predict_fregre_robust};
44
45// ---------------------------------------------------------------------------
46// Result types
47// ---------------------------------------------------------------------------
48
49/// Result of functional linear regression.
50#[derive(Debug, Clone, PartialEq)]
51#[non_exhaustive]
52pub struct FregreLmResult {
53    /// Intercept α
54    pub intercept: f64,
55    /// Functional coefficient β(t), evaluated on the original grid (length m)
56    pub beta_t: Vec<f64>,
57    /// Pointwise standard errors of β(t) (length m)
58    pub beta_se: Vec<f64>,
59    /// Scalar coefficients γ (one per scalar covariate)
60    pub gamma: Vec<f64>,
61    /// Fitted values ŷ (length n)
62    pub fitted_values: Vec<f64>,
63    /// Residuals y - ŷ (length n)
64    pub residuals: Vec<f64>,
65    /// R² statistic
66    pub r_squared: f64,
67    /// Adjusted R²
68    pub r_squared_adj: f64,
69    /// Standard errors of all coefficients (intercept, FPC scores, scalar covariates)
70    pub std_errors: Vec<f64>,
71    /// Number of FPC components used
72    pub ncomp: usize,
73    /// FPCA result (for projecting new data)
74    pub fpca: FpcaResult,
75    /// Regression coefficients on (FPC scores, scalar covariates) — internal
76    pub coefficients: Vec<f64>,
77    /// Residual standard error
78    pub residual_se: f64,
79    /// GCV criterion value (if computed)
80    pub gcv: f64,
81    /// Akaike Information Criterion
82    pub aic: f64,
83    /// Bayesian Information Criterion
84    pub bic: f64,
85}
86
87/// Result of nonparametric functional regression with mixed predictors.
88#[derive(Debug, Clone, PartialEq)]
89#[non_exhaustive]
90pub struct FregreNpResult {
91    /// Fitted values ŷ (length n)
92    pub fitted_values: Vec<f64>,
93    /// Residuals y - ŷ (length n)
94    pub residuals: Vec<f64>,
95    /// R² statistic
96    pub r_squared: f64,
97    /// Bandwidth for functional distance kernel
98    pub h_func: f64,
99    /// Bandwidth for scalar covariates kernel
100    pub h_scalar: f64,
101    /// Leave-one-out CV error
102    pub cv_error: f64,
103}
104
105/// Result of robust (L1 or Huber) functional regression.
106#[derive(Debug, Clone, PartialEq)]
107#[non_exhaustive]
108pub struct FregreRobustResult {
109    /// Intercept
110    pub intercept: f64,
111    /// Functional coefficient β(t), evaluated on the original grid (length m)
112    pub beta_t: Vec<f64>,
113    /// Fitted values ŷ (length n)
114    pub fitted_values: Vec<f64>,
115    /// Residuals y - ŷ (length n)
116    pub residuals: Vec<f64>,
117    /// Regression coefficients (intercept, FPC scores, scalar covariates)
118    pub coefficients: Vec<f64>,
119    /// Number of FPC components used
120    pub ncomp: usize,
121    /// FPCA result (for projecting new data)
122    pub fpca: FpcaResult,
123    /// Number of IRLS iterations performed
124    pub iterations: usize,
125    /// Whether the IRLS algorithm converged
126    pub converged: bool,
127    /// Final IRLS weights (length n)
128    pub weights: Vec<f64>,
129    /// R² statistic
130    pub r_squared: f64,
131}
132
133/// Result of functional logistic regression.
134#[derive(Debug, Clone, PartialEq)]
135#[non_exhaustive]
136pub struct FunctionalLogisticResult {
137    /// Intercept α
138    pub intercept: f64,
139    /// Functional coefficient β(t), evaluated on the original grid (length m)
140    pub beta_t: Vec<f64>,
141    /// Pointwise standard errors of β(t) (length m)
142    pub beta_se: Vec<f64>,
143    /// Scalar coefficients γ (one per scalar covariate)
144    pub gamma: Vec<f64>,
145    /// Predicted probabilities P(Y=1) (length n)
146    pub probabilities: Vec<f64>,
147    /// Predicted class labels (0 or 1)
148    pub predicted_classes: Vec<usize>,
149    /// Number of FPC components used
150    pub ncomp: usize,
151    /// Classification accuracy on training data
152    pub accuracy: f64,
153    /// Standard errors of all coefficients (intercept, FPC scores, scalar covariates)
154    pub std_errors: Vec<f64>,
155    /// Regression coefficients on (FPC scores, scalar covariates) — internal
156    pub coefficients: Vec<f64>,
157    /// Log-likelihood at convergence
158    pub log_likelihood: f64,
159    /// Number of IRLS iterations
160    pub iterations: usize,
161    /// FPCA result (for projecting new data)
162    pub fpca: FpcaResult,
163    /// Akaike Information Criterion
164    pub aic: f64,
165    /// Bayesian Information Criterion
166    pub bic: f64,
167}
168
169/// Result of cross-validation for K selection.
170#[derive(Debug, Clone, PartialEq)]
171#[non_exhaustive]
172pub struct FregreCvResult {
173    /// Candidate K values tested
174    pub k_values: Vec<usize>,
175    /// CV error for each K
176    pub cv_errors: Vec<f64>,
177    /// Optimal K (minimizing CV error)
178    pub optimal_k: usize,
179    /// Minimum CV error
180    pub min_cv_error: f64,
181    /// Out-of-fold predictions at optimal K (length n, each predicted when held out)
182    pub oof_predictions: Vec<f64>,
183    /// Fold assignment for each observation (0..n_folds)
184    pub fold_assignments: Vec<usize>,
185    /// Per-fold MSE at optimal K
186    pub fold_errors: Vec<f64>,
187}
188
189/// Result of PLS-based scalar-on-function regression.
190#[derive(Debug, Clone, PartialEq)]
191#[non_exhaustive]
192pub struct PlsRegressionResult {
193    /// Intercept α
194    pub intercept: f64,
195    /// Functional coefficient β(t), evaluated on the original grid (length m)
196    pub beta_t: Vec<f64>,
197    /// Scalar coefficients γ (one per scalar covariate)
198    pub gamma: Vec<f64>,
199    /// Fitted values ŷ (length n)
200    pub fitted_values: Vec<f64>,
201    /// Residuals y - ŷ (length n)
202    pub residuals: Vec<f64>,
203    /// R² statistic
204    pub r_squared: f64,
205    /// Adjusted R²
206    pub r_squared_adj: f64,
207    /// Number of PLS components used
208    pub ncomp: usize,
209    /// PLS result (for projecting new data)
210    pub pls: PlsResult,
211    /// Regression coefficients on (intercept, PLS scores, scalar covariates)
212    pub coefficients: Vec<f64>,
213    /// Residual standard error
214    pub residual_se: f64,
215    /// Akaike Information Criterion
216    pub aic: f64,
217    /// Bayesian Information Criterion
218    pub bic: f64,
219}
220
221/// Criterion used for model selection.
222#[derive(Debug, Clone, Copy, PartialEq)]
223pub enum SelectionCriterion {
224    /// Akaike Information Criterion
225    Aic,
226    /// Bayesian Information Criterion
227    Bic,
228    /// Generalized Cross-Validation
229    Gcv,
230}
231
232/// Result of ncomp model selection.
233#[derive(Debug, Clone, PartialEq)]
234#[non_exhaustive]
235pub struct ModelSelectionResult {
236    /// Best number of FPC components by the chosen criterion
237    pub best_ncomp: usize,
238    /// (ncomp, AIC, BIC, GCV) for each candidate
239    pub criteria: Vec<(usize, f64, f64, f64)>,
240}
241
242/// Result of bootstrap confidence intervals for β(t).
243#[derive(Debug, Clone, PartialEq)]
244#[non_exhaustive]
245pub struct BootstrapCiResult {
246    /// Pointwise lower bound (length m).
247    pub lower: Vec<f64>,
248    /// Pointwise upper bound (length m).
249    pub upper: Vec<f64>,
250    /// Original β(t) estimate (length m).
251    pub center: Vec<f64>,
252    /// Simultaneous lower bound (sup-norm adjusted, length m).
253    pub sim_lower: Vec<f64>,
254    /// Simultaneous upper bound (sup-norm adjusted, length m).
255    pub sim_upper: Vec<f64>,
256    /// Number of bootstrap replicates that converged.
257    pub n_boot_success: usize,
258}
259
260/// Result of lambda selection for basis regression via cross-validation.
261#[derive(Debug, Clone, PartialEq)]
262#[non_exhaustive]
263pub struct FregreBasisCvResult {
264    /// Optimal smoothing parameter lambda.
265    pub optimal_lambda: f64,
266    /// Mean CV error for each lambda.
267    pub cv_errors: Vec<f64>,
268    /// SE of CV error across folds for each lambda.
269    pub cv_se: Vec<f64>,
270    /// Lambda values tested.
271    pub lambda_values: Vec<f64>,
272    /// Minimum mean CV error.
273    pub min_cv_error: f64,
274}
275
276/// Result of bandwidth selection for nonparametric regression via CV.
277#[derive(Debug, Clone, PartialEq)]
278#[non_exhaustive]
279pub struct FregreNpCvResult {
280    /// Optimal bandwidth.
281    pub optimal_h: f64,
282    /// Mean CV error for each bandwidth.
283    pub cv_errors: Vec<f64>,
284    /// SE of CV error across folds for each bandwidth.
285    pub cv_se: Vec<f64>,
286    /// Bandwidth values tested.
287    pub h_values: Vec<f64>,
288    /// Minimum mean CV error.
289    pub min_cv_error: f64,
290}
291
292// ---------------------------------------------------------------------------
293// Shared linear algebra helpers (delegated to crate::linalg)
294// ---------------------------------------------------------------------------
295
296// Re-export for use by submodules and explain/ modules that import from
297// `crate::scalar_on_function::{cholesky_factor, cholesky_forward_back, compute_xtx}`.
298pub(crate) use crate::linalg::cholesky_factor;
299pub(crate) use crate::linalg::cholesky_forward_back;
300pub(crate) use crate::linalg::compute_xtx;
301
302/// Compute X'y (length p).
303fn compute_xty(x: &FdMatrix, y: &[f64]) -> Vec<f64> {
304    let (n, p) = x.shape();
305    (0..p)
306        .map(|k| {
307            let mut s = 0.0;
308            for i in 0..n {
309                s += x[(i, k)] * y[i];
310            }
311            s
312        })
313        .collect()
314}
315
316/// Solve Ax = b via Cholesky decomposition (A must be symmetric positive definite).
317pub(super) fn cholesky_solve(a: &[f64], b: &[f64], p: usize) -> Result<Vec<f64>, FdarError> {
318    linalg_cholesky_solve(a, b, p)
319}
320
321/// Compute hat matrix diagonal: H_ii = x_i' (X'X)^{-1} x_i, given Cholesky factor L of X'X.
322pub(crate) fn compute_hat_diagonal(x: &FdMatrix, l: &[f64]) -> Vec<f64> {
323    let (n, p) = x.shape();
324    let mut hat_diag = vec![0.0; n];
325    for i in 0..n {
326        let mut v = vec![0.0; p];
327        for j in 0..p {
328            v[j] = x[(i, j)];
329            for k in 0..j {
330                v[j] -= l[j * p + k] * v[k];
331            }
332            v[j] /= l[j * p + j];
333        }
334        hat_diag[i] = v.iter().map(|vi| vi * vi).sum();
335    }
336    hat_diag
337}
338
339/// Compute diagonal of (X'X)^{-1} given Cholesky factor L, then SE = sqrt(sigma² * diag).
340fn compute_ols_std_errors(l: &[f64], p: usize, sigma2: f64) -> Vec<f64> {
341    let mut se = vec![0.0; p];
342    for j in 0..p {
343        let mut v = vec![0.0; p];
344        v[j] = 1.0;
345        for k in 0..p {
346            for kk in 0..k {
347                v[k] -= l[k * p + kk] * v[kk];
348            }
349            v[k] /= l[k * p + k];
350        }
351        se[j] = (sigma2 * v.iter().map(|vi| vi * vi).sum::<f64>()).sqrt();
352    }
353    se
354}
355
356// ---------------------------------------------------------------------------
357// Design matrix and coefficient recovery
358// ---------------------------------------------------------------------------
359
360/// Build design matrix: \[1, ξ_1, ..., ξ_K, z_1, ..., z_p\].
361/// Validate inputs for fregre_lm / functional_logistic.
362fn validate_fregre_inputs(
363    n: usize,
364    m: usize,
365    y: &[f64],
366    scalar_covariates: Option<&FdMatrix>,
367) -> Result<(), FdarError> {
368    if n < 3 {
369        return Err(FdarError::InvalidDimension {
370            parameter: "data",
371            expected: "at least 3 rows".to_string(),
372            actual: format!("{n}"),
373        });
374    }
375    if m == 0 {
376        return Err(FdarError::InvalidDimension {
377            parameter: "data",
378            expected: "at least 1 column".to_string(),
379            actual: "0".to_string(),
380        });
381    }
382    if y.len() != n {
383        return Err(FdarError::InvalidDimension {
384            parameter: "y",
385            expected: format!("{n}"),
386            actual: format!("{}", y.len()),
387        });
388    }
389    if let Some(sc) = scalar_covariates {
390        if sc.nrows() != n {
391            return Err(FdarError::InvalidDimension {
392                parameter: "scalar_covariates",
393                expected: format!("{n} rows"),
394                actual: format!("{} rows", sc.nrows()),
395            });
396        }
397    }
398    Ok(())
399}
400
401/// Resolve ncomp: auto-select via CV if 0, otherwise clamp.
402fn resolve_ncomp(
403    ncomp: usize,
404    data: &FdMatrix,
405    y: &[f64],
406    scalar_covariates: Option<&FdMatrix>,
407    n: usize,
408    m: usize,
409) -> Result<usize, FdarError> {
410    if ncomp == 0 {
411        let cv = fregre_cv(data, y, scalar_covariates, 1, m.min(n - 1).min(20), 5)?;
412        Ok(cv.optimal_k)
413    } else {
414        Ok(ncomp.min(n - 1).min(m))
415    }
416}
417
418pub(crate) fn build_design_matrix(
419    fpca_scores: &FdMatrix,
420    ncomp: usize,
421    scalar_covariates: Option<&FdMatrix>,
422    n: usize,
423) -> FdMatrix {
424    let p_scalar = scalar_covariates.map_or(0, super::matrix::FdMatrix::ncols);
425    let p_total = 1 + ncomp + p_scalar;
426    let mut design = FdMatrix::zeros(n, p_total);
427    for i in 0..n {
428        design[(i, 0)] = 1.0;
429        for k in 0..ncomp {
430            design[(i, 1 + k)] = fpca_scores[(i, k)];
431        }
432        if let Some(sc) = scalar_covariates {
433            for j in 0..p_scalar {
434                design[(i, 1 + ncomp + j)] = sc[(i, j)];
435            }
436        }
437    }
438    design
439}
440
441/// Recover functional coefficient β(t) = Σ_k γ_k φ_k(t).
442fn recover_beta_t(fpc_coeffs: &[f64], rotation: &FdMatrix, m: usize) -> Vec<f64> {
443    let ncomp = fpc_coeffs.len();
444    let mut beta_t = vec![0.0; m];
445    for k in 0..ncomp {
446        for j in 0..m {
447            beta_t[j] += fpc_coeffs[k] * rotation[(j, k)];
448        }
449    }
450    beta_t
451}
452
453/// Pointwise standard error of β(t) via error propagation through FPCA rotation.
454///
455/// SE[β(t_j)]² = Σ_k φ_k(t_j)² · SE[γ_k]²
456fn compute_beta_se(gamma_se: &[f64], rotation: &FdMatrix, m: usize) -> Vec<f64> {
457    let ncomp = gamma_se.len();
458    let mut beta_se = vec![0.0; m];
459    for j in 0..m {
460        let mut var_j = 0.0;
461        for k in 0..ncomp {
462            var_j += rotation[(j, k)].powi(2) * gamma_se[k].powi(2);
463        }
464        beta_se[j] = var_j.sqrt();
465    }
466    beta_se
467}
468
469/// Compute fitted values ŷ = X β.
470fn compute_fitted(design: &FdMatrix, coeffs: &[f64]) -> Vec<f64> {
471    let (n, p) = design.shape();
472    (0..n)
473        .map(|i| {
474            let mut yhat = 0.0;
475            for j in 0..p {
476                yhat += design[(i, j)] * coeffs[j];
477            }
478            yhat
479        })
480        .collect()
481}
482
483/// Compute R² and adjusted R².
484fn compute_r_squared(y: &[f64], residuals: &[f64], p_total: usize) -> (f64, f64) {
485    let n = y.len();
486    let y_mean = y.iter().sum::<f64>() / n as f64;
487    let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
488    let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
489    let r_squared = if ss_tot > 0.0 {
490        1.0 - ss_res / ss_tot
491    } else {
492        0.0
493    };
494    let df_model = (p_total - 1) as f64;
495    let r_squared_adj = if n as f64 - df_model - 1.0 > 0.0 {
496        1.0 - (1.0 - r_squared) * (n as f64 - 1.0) / (n as f64 - df_model - 1.0)
497    } else {
498        r_squared
499    };
500    (r_squared, r_squared_adj)
501}
502
503// ---------------------------------------------------------------------------
504// OLS solver
505// ---------------------------------------------------------------------------
506
507/// Solve ordinary least squares: min ||Xb - y||² via normal equations with Cholesky.
508/// Returns (coefficients, hat_matrix_diagonal) or error if singular.
509fn ols_solve(x: &FdMatrix, y: &[f64]) -> Result<(Vec<f64>, Vec<f64>), FdarError> {
510    let (n, p) = x.shape();
511    if n < p || p == 0 {
512        return Err(FdarError::InvalidDimension {
513            parameter: "design matrix",
514            expected: format!("n >= p and p > 0 (p={p})"),
515            actual: format!("n={n}, p={p}"),
516        });
517    }
518    let xtx = compute_xtx(x);
519    let xty = compute_xty(x, y);
520    let l = cholesky_factor(&xtx, p)?;
521    let b = cholesky_forward_back(&l, &xty, p);
522    let hat_diag = compute_hat_diagonal(x, &l);
523    Ok((b, hat_diag))
524}
525
526/// Sigmoid function: 1 / (1 + exp(-x))
527pub(crate) fn sigmoid(x: f64) -> f64 {
528    if x >= 0.0 {
529        1.0 / (1.0 + (-x).exp())
530    } else {
531        let ex = x.exp();
532        ex / (1.0 + ex)
533    }
534}
535
536// ---------------------------------------------------------------------------
537// Predict methods on result structs
538// ---------------------------------------------------------------------------
539
540impl FregreLmResult {
541    /// Predict new responses. Delegates to [`predict_fregre_lm`].
542    pub fn predict(&self, new_data: &FdMatrix, new_scalar: Option<&FdMatrix>) -> Vec<f64> {
543        predict_fregre_lm(self, new_data, new_scalar)
544    }
545}
546
547impl FregreRobustResult {
548    /// Predict new responses. Delegates to [`predict_fregre_robust`].
549    pub fn predict(&self, new_data: &FdMatrix, new_scalar: Option<&FdMatrix>) -> Vec<f64> {
550        predict_fregre_robust(self, new_data, new_scalar)
551    }
552}
553
554impl FunctionalLogisticResult {
555    /// Predict P(Y=1) for new data. Delegates to [`predict_functional_logistic`].
556    pub fn predict(&self, new_data: &FdMatrix, new_scalar: Option<&FdMatrix>) -> Vec<f64> {
557        predict_functional_logistic(self, new_data, new_scalar)
558    }
559}