Skip to main content

fdars_core/scalar_on_function/
fregre_lm.rs

1use super::*;
2
3// ---------------------------------------------------------------------------
4// fregre_lm: FPC-based functional linear model
5// ---------------------------------------------------------------------------
6
7/// Functional linear model with optional scalar covariates.
8///
9/// Fits the model: `y = α + Σ_k γ_k ξ_k + γ_z' z + ε`
10/// where ξ_k are FPC scores of the functional predictor X(t) and z are scalar covariates.
11/// The functional coefficient is recovered as `β(t) = Σ_k γ_k φ_k(t)`.
12///
13/// # Arguments
14/// * `data` - Functional predictor matrix (n × m)
15/// * `y` - Scalar response vector (length n)
16/// * `scalar_covariates` - Optional scalar covariates matrix (n × p), `None` for pure functional model
17/// * `ncomp` - Number of FPC components (if 0, selected by GCV)
18///
19/// # Errors
20///
21/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 3 rows,
22/// zero columns, `y.len() != n`, or `scalar_covariates` row count differs from `n`.
23/// Returns [`FdarError::InvalidParameter`] if auto-selected `ncomp` via
24/// [`fregre_cv`] fails due to invalid range, or if `ncomp` passed to FPCA is zero.
25/// Returns [`FdarError::ComputationFailed`] if the SVD inside FPCA fails to
26/// extract components, or if the OLS normal equations (X'X) are singular.
27///
28/// # Returns
29/// [`FregreLmResult`] with estimated coefficients, fitted values, and diagnostics
30#[must_use = "expensive computation whose result should not be discarded"]
31pub fn fregre_lm(
32    data: &FdMatrix,
33    y: &[f64],
34    scalar_covariates: Option<&FdMatrix>,
35    ncomp: usize,
36) -> Result<FregreLmResult, FdarError> {
37    let (n, m) = data.shape();
38    validate_fregre_inputs(n, m, y, scalar_covariates)?;
39
40    let ncomp = resolve_ncomp(ncomp, data, y, scalar_covariates, n, m)?;
41
42    let fpca = fdata_to_pc_1d(data, ncomp)?;
43    let design = build_design_matrix(&fpca.scores, ncomp, scalar_covariates, n);
44    let p_total = design.ncols();
45    let (coeffs, hat_diag) = ols_solve(&design, y)?;
46
47    let fitted_values = compute_fitted(&design, &coeffs);
48    let residuals: Vec<f64> = y
49        .iter()
50        .zip(&fitted_values)
51        .map(|(&yi, &yh)| yi - yh)
52        .collect();
53    let (r_squared, r_squared_adj) = compute_r_squared(y, &residuals, p_total);
54
55    let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
56    let df_resid = (n as f64 - p_total as f64).max(1.0);
57    let residual_se = (ss_res / df_resid).sqrt();
58    let sigma2 = ss_res / df_resid;
59
60    let xtx = compute_xtx(&design);
61    let l = cholesky_factor(&xtx, p_total).unwrap_or_else(|_| vec![1.0; p_total * p_total]);
62    let std_errors = compute_ols_std_errors(&l, p_total, sigma2);
63
64    let gcv = hat_diag
65        .iter()
66        .zip(&residuals)
67        .map(|(&h, &r)| (r / (1.0 - h).max(1e-10)).powi(2))
68        .sum::<f64>()
69        / n as f64;
70
71    let beta_t = recover_beta_t(&coeffs[1..1 + ncomp], &fpca.rotation, m);
72    let beta_se = compute_beta_se(&std_errors[1..1 + ncomp], &fpca.rotation, m);
73    let gamma: Vec<f64> = coeffs[1 + ncomp..].to_vec();
74
75    let nf = n as f64;
76    let rss = ss_res;
77    let aic = nf * (rss / nf).ln() + 2.0 * p_total as f64;
78    let bic = nf * (rss / nf).ln() + (nf).ln() * p_total as f64;
79
80    Ok(FregreLmResult {
81        intercept: coeffs[0],
82        beta_t,
83        beta_se,
84        gamma,
85        fitted_values,
86        residuals,
87        r_squared,
88        r_squared_adj,
89        std_errors,
90        ncomp,
91        fpca,
92        coefficients: coeffs,
93        residual_se,
94        gcv,
95        aic,
96        bic,
97    })
98}
99
100// ---------------------------------------------------------------------------
101// fregre_cv: Cross-validation for K selection
102// ---------------------------------------------------------------------------
103
104/// Copy a subset of rows from src into dst.
105fn copy_rows(dst: &mut FdMatrix, src: &FdMatrix, src_rows: &[usize]) {
106    let ncols = src.ncols();
107    for (dst_i, &src_i) in src_rows.iter().enumerate() {
108        for j in 0..ncols {
109            dst[(dst_i, j)] = src[(src_i, j)];
110        }
111    }
112}
113
114/// Compute CV error for a single K across all folds (randomized assignment).
115fn cv_error_for_k(
116    data: &FdMatrix,
117    y: &[f64],
118    scalar_covariates: Option<&FdMatrix>,
119    k: usize,
120    n_folds: usize,
121    folds: &[usize],
122) -> Result<f64, FdarError> {
123    let n = data.nrows();
124    let ncols = data.ncols();
125    let mut total_error = 0.0;
126    let mut count = 0;
127
128    for fold in 0..n_folds {
129        let train_idx: Vec<usize> = (0..n).filter(|&i| folds[i] != fold).collect();
130        let test_idx: Vec<usize> = (0..n).filter(|&i| folds[i] == fold).collect();
131        let n_test = test_idx.len();
132        if n_test == 0 || train_idx.len() < k + 2 {
133            continue;
134        }
135
136        let mut train_data = FdMatrix::zeros(train_idx.len(), ncols);
137        let mut test_data = FdMatrix::zeros(n_test, ncols);
138        copy_rows(&mut train_data, data, &train_idx);
139        copy_rows(&mut test_data, data, &test_idx);
140
141        let train_y: Vec<f64> = train_idx.iter().map(|&i| y[i]).collect();
142        let test_y: Vec<f64> = test_idx.iter().map(|&i| y[i]).collect();
143
144        let train_sc = scalar_covariates.map(|sc| {
145            let mut m = FdMatrix::zeros(train_idx.len(), sc.ncols());
146            copy_rows(&mut m, sc, &train_idx);
147            m
148        });
149        let test_sc = scalar_covariates.map(|sc| {
150            let mut m = FdMatrix::zeros(n_test, sc.ncols());
151            copy_rows(&mut m, sc, &test_idx);
152            m
153        });
154
155        let fit = match fregre_lm(&train_data, &train_y, train_sc.as_ref(), k) {
156            Ok(f) => f,
157            Err(_) => continue,
158        };
159
160        let predictions = predict_fregre_lm(&fit, &test_data, test_sc.as_ref());
161        let fold_mse: f64 = predictions
162            .iter()
163            .zip(&test_y)
164            .map(|(&yhat, &yi)| (yhat - yi).powi(2))
165            .sum::<f64>()
166            / n_test as f64;
167
168        total_error += fold_mse * n_test as f64;
169        count += n_test;
170    }
171
172    if count > 0 {
173        Ok(total_error / count as f64)
174    } else {
175        Err(FdarError::ComputationFailed {
176            operation: "CV error computation",
177            detail: "no valid folds produced predictions".to_string(),
178        })
179    }
180}
181
182/// K-fold cross-validation for selecting the number of FPC components.
183///
184/// # Arguments
185/// * `data` - Functional predictor matrix (n × m)
186/// * `y` - Scalar response vector (length n)
187/// * `scalar_covariates` - Optional scalar covariates matrix
188/// * `k_min` - Minimum number of components to test
189/// * `k_max` - Maximum number of components to test
190/// * `n_folds` - Number of CV folds
191///
192/// # Errors
193///
194/// Returns [`FdarError::InvalidDimension`] if `data` has fewer rows than `n_folds`.
195/// Returns [`FdarError::InvalidParameter`] if `k_min < 1` or `k_min > k_max`.
196/// Returns [`FdarError::ComputationFailed`] if no candidate K value produces a
197/// valid CV error across all folds.
198#[must_use = "expensive computation whose result should not be discarded"]
199pub fn fregre_cv(
200    data: &FdMatrix,
201    y: &[f64],
202    scalar_covariates: Option<&FdMatrix>,
203    k_min: usize,
204    k_max: usize,
205    n_folds: usize,
206) -> Result<FregreCvResult, FdarError> {
207    let n = data.nrows();
208    if n < n_folds {
209        return Err(FdarError::InvalidDimension {
210            parameter: "data",
211            expected: format!("at least {n_folds} rows (n_folds)"),
212            actual: format!("{n}"),
213        });
214    }
215    if k_min < 1 || k_min > k_max {
216        return Err(FdarError::InvalidParameter {
217            parameter: "k_min/k_max",
218            message: format!("k_min={k_min} must be >= 1 and <= k_max={k_max}"),
219        });
220    }
221
222    let k_max = k_max.min(n - 2).min(data.ncols());
223
224    // Use randomized fold assignment (consistent seed for reproducibility)
225    let folds = create_folds(n, n_folds, 42);
226
227    let mut k_values = Vec::new();
228    let mut cv_errors = Vec::new();
229
230    for k in k_min..=k_max {
231        if let Ok(err) = cv_error_for_k(data, y, scalar_covariates, k, n_folds, &folds) {
232            k_values.push(k);
233            cv_errors.push(err);
234        }
235    }
236
237    if k_values.is_empty() {
238        return Err(FdarError::ComputationFailed {
239            operation: "fregre_cv",
240            detail: "no valid K values produced CV errors".to_string(),
241        });
242    }
243
244    let (optimal_idx, &min_cv_error) = cv_errors
245        .iter()
246        .enumerate()
247        .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
248        .expect("checked: cv_errors is non-empty");
249
250    Ok(FregreCvResult {
251        k_values: k_values.clone(),
252        cv_errors,
253        optimal_k: k_values[optimal_idx],
254        min_cv_error,
255    })
256}
257
258// ---------------------------------------------------------------------------
259// Model selection
260// ---------------------------------------------------------------------------
261
262/// Select optimal ncomp for `fregre_lm` using AIC, BIC, or GCV.
263///
264/// Fits models for `ncomp = 1..=max_ncomp`, collects information criteria,
265/// and returns the best ncomp under the chosen criterion.
266///
267/// # Arguments
268/// * `data` — Functional predictor matrix (n × m)
269/// * `y` — Scalar responses (length n)
270/// * `scalar_covariates` — Optional scalar covariates (n × p)
271/// * `max_ncomp` — Maximum number of FPC components to try
272/// * `criterion` — Which criterion to minimise
273///
274/// # Errors
275///
276/// Returns [`FdarError::InvalidParameter`] if `max_ncomp` is zero.
277/// Returns [`FdarError::ComputationFailed`] if no candidate ncomp produces a
278/// valid fitted model (all calls to [`fregre_lm`] fail).
279#[must_use = "expensive computation whose result should not be discarded"]
280pub fn model_selection_ncomp(
281    data: &FdMatrix,
282    y: &[f64],
283    scalar_covariates: Option<&FdMatrix>,
284    max_ncomp: usize,
285    criterion: SelectionCriterion,
286) -> Result<ModelSelectionResult, FdarError> {
287    if max_ncomp == 0 {
288        return Err(FdarError::InvalidParameter {
289            parameter: "max_ncomp",
290            message: "must be >= 1".to_string(),
291        });
292    }
293
294    let mut criteria = Vec::with_capacity(max_ncomp);
295
296    for k in 1..=max_ncomp {
297        if let Ok(fit) = fregre_lm(data, y, scalar_covariates, k) {
298            criteria.push((k, fit.aic, fit.bic, fit.gcv));
299        }
300    }
301
302    if criteria.is_empty() {
303        return Err(FdarError::ComputationFailed {
304            operation: "model_selection_ncomp",
305            detail: "no valid models could be fitted".to_string(),
306        });
307    }
308
309    let best_idx = match criterion {
310        SelectionCriterion::Aic => criteria
311            .iter()
312            .enumerate()
313            .min_by(|(_, a), (_, b)| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
314            .map_or(0, |(i, _)| i),
315        SelectionCriterion::Bic => criteria
316            .iter()
317            .enumerate()
318            .min_by(|(_, a), (_, b)| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal))
319            .map_or(0, |(i, _)| i),
320        SelectionCriterion::Gcv => criteria
321            .iter()
322            .enumerate()
323            .min_by(|(_, a), (_, b)| a.3.partial_cmp(&b.3).unwrap_or(std::cmp::Ordering::Equal))
324            .map_or(0, |(i, _)| i),
325    };
326
327    Ok(ModelSelectionResult {
328        best_ncomp: criteria[best_idx].0,
329        criteria,
330    })
331}
332
333/// Predict new responses using a fitted functional linear model.
334///
335/// # Arguments
336/// * `fit` - A fitted [`FregreLmResult`]
337/// * `new_data` - New functional predictor matrix (n_new × m)
338/// * `new_scalar` - Optional new scalar covariates (n_new × p)
339pub fn predict_fregre_lm(
340    fit: &FregreLmResult,
341    new_data: &FdMatrix,
342    new_scalar: Option<&FdMatrix>,
343) -> Vec<f64> {
344    let (n_new, m) = new_data.shape();
345    let ncomp = fit.ncomp;
346    let p_scalar = fit.gamma.len();
347
348    let mut predictions = vec![0.0; n_new];
349    for i in 0..n_new {
350        let mut yhat = fit.intercept;
351        for k in 0..ncomp {
352            let mut s = 0.0;
353            for j in 0..m {
354                s += (new_data[(i, j)] - fit.fpca.mean[j]) * fit.fpca.rotation[(j, k)];
355            }
356            yhat += fit.coefficients[1 + k] * s;
357        }
358        if let Some(sc) = new_scalar {
359            for j in 0..p_scalar {
360                yhat += fit.gamma[j] * sc[(i, j)];
361            }
362        }
363        predictions[i] = yhat;
364    }
365    predictions
366}