Skip to main content

fdars_core/scalar_on_function/
mod.rs

1//! Scalar-on-function regression with mixed scalar/functional covariates.
2//!
3//! Implements models of the form:
4//! ```text
5//! y = α + ∫β(t)X(t)dt + γᵀz + ε
6//! ```
7//! where X(t) is a functional predictor, z is a vector of scalar covariates,
8//! β(t) is the functional coefficient, and γ is the vector of scalar coefficients.
9//!
10//! # Methods
11//!
12//! - [`fregre_lm`]: FPC-based functional linear model with optional scalar covariates
13//! - [`fregre_l1`]: L1 (median) robust functional regression via IRLS
14//! - [`fregre_huber`]: Huber M-estimation robust functional regression via IRLS
15//! - [`fregre_np_mixed`]: Nonparametric kernel regression with product kernels
16//! - [`functional_logistic`]: Logistic regression for binary outcomes
17//! - [`fregre_cv`]: Cross-validation for number of FPC components
18
19use crate::error::FdarError;
20use crate::matrix::FdMatrix;
21use crate::regression::FpcaResult;
22
23mod bootstrap;
24mod cv;
25mod fregre_lm;
26mod logistic;
27mod nonparametric;
28mod robust;
29#[cfg(test)]
30mod tests;
31
32// Re-export all public items from submodules
33pub use bootstrap::{bootstrap_ci_fregre_lm, bootstrap_ci_functional_logistic};
34pub use cv::{fregre_basis_cv, fregre_np_cv};
35pub use fregre_lm::{fregre_cv, fregre_lm, model_selection_ncomp, predict_fregre_lm};
36pub use logistic::{functional_logistic, predict_functional_logistic};
37pub use nonparametric::{fregre_np_mixed, predict_fregre_np};
38pub use robust::{fregre_huber, fregre_l1, predict_fregre_robust};
39
40// ---------------------------------------------------------------------------
41// Result types
42// ---------------------------------------------------------------------------
43
44/// Result of functional linear regression.
45#[derive(Debug, Clone, PartialEq)]
46pub struct FregreLmResult {
47    /// Intercept α
48    pub intercept: f64,
49    /// Functional coefficient β(t), evaluated on the original grid (length m)
50    pub beta_t: Vec<f64>,
51    /// Pointwise standard errors of β(t) (length m)
52    pub beta_se: Vec<f64>,
53    /// Scalar coefficients γ (one per scalar covariate)
54    pub gamma: Vec<f64>,
55    /// Fitted values ŷ (length n)
56    pub fitted_values: Vec<f64>,
57    /// Residuals y - ŷ (length n)
58    pub residuals: Vec<f64>,
59    /// R² statistic
60    pub r_squared: f64,
61    /// Adjusted R²
62    pub r_squared_adj: f64,
63    /// Standard errors of all coefficients (intercept, FPC scores, scalar covariates)
64    pub std_errors: Vec<f64>,
65    /// Number of FPC components used
66    pub ncomp: usize,
67    /// FPCA result (for projecting new data)
68    pub fpca: FpcaResult,
69    /// Regression coefficients on (FPC scores, scalar covariates) — internal
70    pub coefficients: Vec<f64>,
71    /// Residual standard error
72    pub residual_se: f64,
73    /// GCV criterion value (if computed)
74    pub gcv: f64,
75    /// Akaike Information Criterion
76    pub aic: f64,
77    /// Bayesian Information Criterion
78    pub bic: f64,
79}
80
81/// Result of nonparametric functional regression with mixed predictors.
82#[derive(Debug, Clone, PartialEq)]
83pub struct FregreNpResult {
84    /// Fitted values ŷ (length n)
85    pub fitted_values: Vec<f64>,
86    /// Residuals y - ŷ (length n)
87    pub residuals: Vec<f64>,
88    /// R² statistic
89    pub r_squared: f64,
90    /// Bandwidth for functional distance kernel
91    pub h_func: f64,
92    /// Bandwidth for scalar covariates kernel
93    pub h_scalar: f64,
94    /// Leave-one-out CV error
95    pub cv_error: f64,
96}
97
98/// Result of robust (L1 or Huber) functional regression.
99#[derive(Debug, Clone, PartialEq)]
100#[non_exhaustive]
101pub struct FregreRobustResult {
102    /// Intercept
103    pub intercept: f64,
104    /// Functional coefficient β(t), evaluated on the original grid (length m)
105    pub beta_t: Vec<f64>,
106    /// Fitted values ŷ (length n)
107    pub fitted_values: Vec<f64>,
108    /// Residuals y - ŷ (length n)
109    pub residuals: Vec<f64>,
110    /// Regression coefficients (intercept, FPC scores, scalar covariates)
111    pub coefficients: Vec<f64>,
112    /// Number of FPC components used
113    pub ncomp: usize,
114    /// FPCA result (for projecting new data)
115    pub fpca: FpcaResult,
116    /// Number of IRLS iterations performed
117    pub iterations: usize,
118    /// Whether the IRLS algorithm converged
119    pub converged: bool,
120    /// Final IRLS weights (length n)
121    pub weights: Vec<f64>,
122    /// R² statistic
123    pub r_squared: f64,
124}
125
126/// Result of functional logistic regression.
127#[derive(Debug, Clone, PartialEq)]
128pub struct FunctionalLogisticResult {
129    /// Intercept α
130    pub intercept: f64,
131    /// Functional coefficient β(t), evaluated on the original grid (length m)
132    pub beta_t: Vec<f64>,
133    /// Pointwise standard errors of β(t) (length m)
134    pub beta_se: Vec<f64>,
135    /// Scalar coefficients γ (one per scalar covariate)
136    pub gamma: Vec<f64>,
137    /// Predicted probabilities P(Y=1) (length n)
138    pub probabilities: Vec<f64>,
139    /// Predicted class labels (0 or 1)
140    pub predicted_classes: Vec<usize>,
141    /// Number of FPC components used
142    pub ncomp: usize,
143    /// Classification accuracy on training data
144    pub accuracy: f64,
145    /// Standard errors of all coefficients (intercept, FPC scores, scalar covariates)
146    pub std_errors: Vec<f64>,
147    /// Regression coefficients on (FPC scores, scalar covariates) — internal
148    pub coefficients: Vec<f64>,
149    /// Log-likelihood at convergence
150    pub log_likelihood: f64,
151    /// Number of IRLS iterations
152    pub iterations: usize,
153    /// FPCA result (for projecting new data)
154    pub fpca: FpcaResult,
155    /// Akaike Information Criterion
156    pub aic: f64,
157    /// Bayesian Information Criterion
158    pub bic: f64,
159}
160
161/// Result of cross-validation for K selection.
162#[derive(Debug, Clone, PartialEq)]
163pub struct FregreCvResult {
164    /// Candidate K values tested
165    pub k_values: Vec<usize>,
166    /// CV error for each K
167    pub cv_errors: Vec<f64>,
168    /// Optimal K (minimizing CV error)
169    pub optimal_k: usize,
170    /// Minimum CV error
171    pub min_cv_error: f64,
172}
173
174/// Criterion used for model selection.
175#[derive(Debug, Clone, Copy, PartialEq)]
176pub enum SelectionCriterion {
177    /// Akaike Information Criterion
178    Aic,
179    /// Bayesian Information Criterion
180    Bic,
181    /// Generalized Cross-Validation
182    Gcv,
183}
184
185/// Result of ncomp model selection.
186#[derive(Debug, Clone, PartialEq)]
187pub struct ModelSelectionResult {
188    /// Best number of FPC components by the chosen criterion
189    pub best_ncomp: usize,
190    /// (ncomp, AIC, BIC, GCV) for each candidate
191    pub criteria: Vec<(usize, f64, f64, f64)>,
192}
193
194/// Result of bootstrap confidence intervals for β(t).
195#[derive(Debug, Clone, PartialEq)]
196pub struct BootstrapCiResult {
197    /// Pointwise lower bound (length m).
198    pub lower: Vec<f64>,
199    /// Pointwise upper bound (length m).
200    pub upper: Vec<f64>,
201    /// Original β(t) estimate (length m).
202    pub center: Vec<f64>,
203    /// Simultaneous lower bound (sup-norm adjusted, length m).
204    pub sim_lower: Vec<f64>,
205    /// Simultaneous upper bound (sup-norm adjusted, length m).
206    pub sim_upper: Vec<f64>,
207    /// Number of bootstrap replicates that converged.
208    pub n_boot_success: usize,
209}
210
211/// Result of lambda selection for basis regression via cross-validation.
212#[derive(Debug, Clone, PartialEq)]
213pub struct FregreBasisCvResult {
214    /// Optimal smoothing parameter lambda.
215    pub optimal_lambda: f64,
216    /// Mean CV error for each lambda.
217    pub cv_errors: Vec<f64>,
218    /// SE of CV error across folds for each lambda.
219    pub cv_se: Vec<f64>,
220    /// Lambda values tested.
221    pub lambda_values: Vec<f64>,
222    /// Minimum mean CV error.
223    pub min_cv_error: f64,
224}
225
226/// Result of bandwidth selection for nonparametric regression via CV.
227#[derive(Debug, Clone, PartialEq)]
228pub struct FregreNpCvResult {
229    /// Optimal bandwidth.
230    pub optimal_h: f64,
231    /// Mean CV error for each bandwidth.
232    pub cv_errors: Vec<f64>,
233    /// SE of CV error across folds for each bandwidth.
234    pub cv_se: Vec<f64>,
235    /// Bandwidth values tested.
236    pub h_values: Vec<f64>,
237    /// Minimum mean CV error.
238    pub min_cv_error: f64,
239}
240
241// ---------------------------------------------------------------------------
242// Shared linear algebra helpers
243// ---------------------------------------------------------------------------
244
245/// Compute X'X (symmetric, p×p stored flat row-major).
246pub(crate) fn compute_xtx(x: &FdMatrix) -> Vec<f64> {
247    let (n, p) = x.shape();
248    let mut xtx = vec![0.0; p * p];
249    for k in 0..p {
250        for j in k..p {
251            let mut s = 0.0;
252            for i in 0..n {
253                s += x[(i, k)] * x[(i, j)];
254            }
255            xtx[k * p + j] = s;
256            xtx[j * p + k] = s;
257        }
258    }
259    xtx
260}
261
262/// Compute X'y (length p).
263fn compute_xty(x: &FdMatrix, y: &[f64]) -> Vec<f64> {
264    let (n, p) = x.shape();
265    (0..p)
266        .map(|k| {
267            let mut s = 0.0;
268            for i in 0..n {
269                s += x[(i, k)] * y[i];
270            }
271            s
272        })
273        .collect()
274}
275
276/// Cholesky factorization: A = LL'. Returns L (p×p flat row-major) or error if singular.
277pub(crate) fn cholesky_factor(a: &[f64], p: usize) -> Result<Vec<f64>, FdarError> {
278    let mut l = vec![0.0; p * p];
279    for j in 0..p {
280        let mut diag = a[j * p + j];
281        for k in 0..j {
282            diag -= l[j * p + k] * l[j * p + k];
283        }
284        if diag <= 1e-12 {
285            return Err(FdarError::ComputationFailed {
286                operation: "Cholesky factorization",
287                detail: "matrix is singular or near-singular; try reducing ncomp or check for collinear FPC scores".into(),
288            });
289        }
290        l[j * p + j] = diag.sqrt();
291        for i in (j + 1)..p {
292            let mut s = a[i * p + j];
293            for k in 0..j {
294                s -= l[i * p + k] * l[j * p + k];
295            }
296            l[i * p + j] = s / l[j * p + j];
297        }
298    }
299    Ok(l)
300}
301
302/// Solve Lz = b (forward) then L'x = z (back). L is p×p flat row-major.
303pub(crate) fn cholesky_forward_back(l: &[f64], b: &[f64], p: usize) -> Vec<f64> {
304    let mut z = b.to_vec();
305    for j in 0..p {
306        for k in 0..j {
307            z[j] -= l[j * p + k] * z[k];
308        }
309        z[j] /= l[j * p + j];
310    }
311    for j in (0..p).rev() {
312        for k in (j + 1)..p {
313            z[j] -= l[k * p + j] * z[k];
314        }
315        z[j] /= l[j * p + j];
316    }
317    z
318}
319
320/// Solve Ax = b via Cholesky decomposition (A must be symmetric positive definite).
321pub(super) fn cholesky_solve(a: &[f64], b: &[f64], p: usize) -> Result<Vec<f64>, FdarError> {
322    let l = cholesky_factor(a, p)?;
323    Ok(cholesky_forward_back(&l, b, p))
324}
325
326/// Compute hat matrix diagonal: H_ii = x_i' (X'X)^{-1} x_i, given Cholesky factor L of X'X.
327pub(crate) fn compute_hat_diagonal(x: &FdMatrix, l: &[f64]) -> Vec<f64> {
328    let (n, p) = x.shape();
329    let mut hat_diag = vec![0.0; n];
330    for i in 0..n {
331        let mut v = vec![0.0; p];
332        for j in 0..p {
333            v[j] = x[(i, j)];
334            for k in 0..j {
335                v[j] -= l[j * p + k] * v[k];
336            }
337            v[j] /= l[j * p + j];
338        }
339        hat_diag[i] = v.iter().map(|vi| vi * vi).sum();
340    }
341    hat_diag
342}
343
344/// Compute diagonal of (X'X)^{-1} given Cholesky factor L, then SE = sqrt(sigma² * diag).
345fn compute_ols_std_errors(l: &[f64], p: usize, sigma2: f64) -> Vec<f64> {
346    let mut se = vec![0.0; p];
347    for j in 0..p {
348        let mut v = vec![0.0; p];
349        v[j] = 1.0;
350        for k in 0..p {
351            for kk in 0..k {
352                v[k] -= l[k * p + kk] * v[kk];
353            }
354            v[k] /= l[k * p + k];
355        }
356        se[j] = (sigma2 * v.iter().map(|vi| vi * vi).sum::<f64>()).sqrt();
357    }
358    se
359}
360
361// ---------------------------------------------------------------------------
362// Design matrix and coefficient recovery
363// ---------------------------------------------------------------------------
364
365/// Build design matrix: \[1, ξ_1, ..., ξ_K, z_1, ..., z_p\].
366/// Validate inputs for fregre_lm / functional_logistic.
367fn validate_fregre_inputs(
368    n: usize,
369    m: usize,
370    y: &[f64],
371    scalar_covariates: Option<&FdMatrix>,
372) -> Result<(), FdarError> {
373    if n < 3 {
374        return Err(FdarError::InvalidDimension {
375            parameter: "data",
376            expected: "at least 3 rows".to_string(),
377            actual: format!("{n}"),
378        });
379    }
380    if m == 0 {
381        return Err(FdarError::InvalidDimension {
382            parameter: "data",
383            expected: "at least 1 column".to_string(),
384            actual: "0".to_string(),
385        });
386    }
387    if y.len() != n {
388        return Err(FdarError::InvalidDimension {
389            parameter: "y",
390            expected: format!("{n}"),
391            actual: format!("{}", y.len()),
392        });
393    }
394    if let Some(sc) = scalar_covariates {
395        if sc.nrows() != n {
396            return Err(FdarError::InvalidDimension {
397                parameter: "scalar_covariates",
398                expected: format!("{n} rows"),
399                actual: format!("{} rows", sc.nrows()),
400            });
401        }
402    }
403    Ok(())
404}
405
406/// Resolve ncomp: auto-select via CV if 0, otherwise clamp.
407fn resolve_ncomp(
408    ncomp: usize,
409    data: &FdMatrix,
410    y: &[f64],
411    scalar_covariates: Option<&FdMatrix>,
412    n: usize,
413    m: usize,
414) -> Result<usize, FdarError> {
415    if ncomp == 0 {
416        let cv = fregre_cv(data, y, scalar_covariates, 1, m.min(n - 1).min(20), 5)?;
417        Ok(cv.optimal_k)
418    } else {
419        Ok(ncomp.min(n - 1).min(m))
420    }
421}
422
423pub(crate) fn build_design_matrix(
424    fpca_scores: &FdMatrix,
425    ncomp: usize,
426    scalar_covariates: Option<&FdMatrix>,
427    n: usize,
428) -> FdMatrix {
429    let p_scalar = scalar_covariates.map_or(0, super::matrix::FdMatrix::ncols);
430    let p_total = 1 + ncomp + p_scalar;
431    let mut design = FdMatrix::zeros(n, p_total);
432    for i in 0..n {
433        design[(i, 0)] = 1.0;
434        for k in 0..ncomp {
435            design[(i, 1 + k)] = fpca_scores[(i, k)];
436        }
437        if let Some(sc) = scalar_covariates {
438            for j in 0..p_scalar {
439                design[(i, 1 + ncomp + j)] = sc[(i, j)];
440            }
441        }
442    }
443    design
444}
445
446/// Recover functional coefficient β(t) = Σ_k γ_k φ_k(t).
447fn recover_beta_t(fpc_coeffs: &[f64], rotation: &FdMatrix, m: usize) -> Vec<f64> {
448    let ncomp = fpc_coeffs.len();
449    let mut beta_t = vec![0.0; m];
450    for k in 0..ncomp {
451        for j in 0..m {
452            beta_t[j] += fpc_coeffs[k] * rotation[(j, k)];
453        }
454    }
455    beta_t
456}
457
458/// Pointwise standard error of β(t) via error propagation through FPCA rotation.
459///
460/// SE[β(t_j)]² = Σ_k φ_k(t_j)² · SE[γ_k]²
461fn compute_beta_se(gamma_se: &[f64], rotation: &FdMatrix, m: usize) -> Vec<f64> {
462    let ncomp = gamma_se.len();
463    let mut beta_se = vec![0.0; m];
464    for j in 0..m {
465        let mut var_j = 0.0;
466        for k in 0..ncomp {
467            var_j += rotation[(j, k)].powi(2) * gamma_se[k].powi(2);
468        }
469        beta_se[j] = var_j.sqrt();
470    }
471    beta_se
472}
473
474/// Compute fitted values ŷ = X β.
475fn compute_fitted(design: &FdMatrix, coeffs: &[f64]) -> Vec<f64> {
476    let (n, p) = design.shape();
477    (0..n)
478        .map(|i| {
479            let mut yhat = 0.0;
480            for j in 0..p {
481                yhat += design[(i, j)] * coeffs[j];
482            }
483            yhat
484        })
485        .collect()
486}
487
488/// Compute R² and adjusted R².
489fn compute_r_squared(y: &[f64], residuals: &[f64], p_total: usize) -> (f64, f64) {
490    let n = y.len();
491    let y_mean = y.iter().sum::<f64>() / n as f64;
492    let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
493    let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
494    let r_squared = if ss_tot > 0.0 {
495        1.0 - ss_res / ss_tot
496    } else {
497        0.0
498    };
499    let df_model = (p_total - 1) as f64;
500    let r_squared_adj = if n as f64 - df_model - 1.0 > 0.0 {
501        1.0 - (1.0 - r_squared) * (n as f64 - 1.0) / (n as f64 - df_model - 1.0)
502    } else {
503        r_squared
504    };
505    (r_squared, r_squared_adj)
506}
507
508// ---------------------------------------------------------------------------
509// OLS solver
510// ---------------------------------------------------------------------------
511
512/// Solve ordinary least squares: min ||Xb - y||² via normal equations with Cholesky.
513/// Returns (coefficients, hat_matrix_diagonal) or error if singular.
514fn ols_solve(x: &FdMatrix, y: &[f64]) -> Result<(Vec<f64>, Vec<f64>), FdarError> {
515    let (n, p) = x.shape();
516    if n < p || p == 0 {
517        return Err(FdarError::InvalidDimension {
518            parameter: "design matrix",
519            expected: format!("n >= p and p > 0 (p={p})"),
520            actual: format!("n={n}, p={p}"),
521        });
522    }
523    let xtx = compute_xtx(x);
524    let xty = compute_xty(x, y);
525    let l = cholesky_factor(&xtx, p)?;
526    let b = cholesky_forward_back(&l, &xty, p);
527    let hat_diag = compute_hat_diagonal(x, &l);
528    Ok((b, hat_diag))
529}
530
531/// Sigmoid function: 1 / (1 + exp(-x))
532pub(crate) fn sigmoid(x: f64) -> f64 {
533    if x >= 0.0 {
534        1.0 / (1.0 + (-x).exp())
535    } else {
536        let ex = x.exp();
537        ex / (1.0 + ex)
538    }
539}
540
541// ---------------------------------------------------------------------------
542// Predict methods on result structs
543// ---------------------------------------------------------------------------
544
545impl FregreLmResult {
546    /// Predict new responses. Delegates to [`predict_fregre_lm`].
547    pub fn predict(&self, new_data: &FdMatrix, new_scalar: Option<&FdMatrix>) -> Vec<f64> {
548        predict_fregre_lm(self, new_data, new_scalar)
549    }
550}
551
552impl FregreRobustResult {
553    /// Predict new responses. Delegates to [`predict_fregre_robust`].
554    pub fn predict(&self, new_data: &FdMatrix, new_scalar: Option<&FdMatrix>) -> Vec<f64> {
555        predict_fregre_robust(self, new_data, new_scalar)
556    }
557}
558
559impl FunctionalLogisticResult {
560    /// Predict P(Y=1) for new data. Delegates to [`predict_functional_logistic`].
561    pub fn predict(&self, new_data: &FdMatrix, new_scalar: Option<&FdMatrix>) -> Vec<f64> {
562        predict_functional_logistic(self, new_data, new_scalar)
563    }
564}