Skip to main content

fdars_core/scalar_on_function/
robust.rs

1use super::{
2    build_design_matrix, cholesky_solve, compute_fitted, compute_r_squared, recover_beta_t,
3    resolve_ncomp, validate_fregre_inputs, FregreRobustResult,
4};
5use crate::error::FdarError;
6use crate::matrix::FdMatrix;
7use crate::regression::fdata_to_pc_1d;
8
9/// Maximum number of IRLS iterations.
10const MAX_ITER: usize = 100;
11/// Convergence tolerance on relative coefficient change.
12const CONV_TOL: f64 = 1e-6;
13/// Epsilon floor to avoid division by zero in L1 weights.
14const L1_EPS: f64 = 1e-6;
15
16// ---------------------------------------------------------------------------
17// Weighted least squares helper
18// ---------------------------------------------------------------------------
19
20/// Compute X'WX (symmetric, p×p stored flat row-major).
21fn compute_xtwx(x: &FdMatrix, w: &[f64]) -> Vec<f64> {
22    let (n, p) = x.shape();
23    let mut xtwx = vec![0.0; p * p];
24    for k in 0..p {
25        for j in k..p {
26            let mut s = 0.0;
27            for i in 0..n {
28                s += w[i] * x[(i, k)] * x[(i, j)];
29            }
30            xtwx[k * p + j] = s;
31            xtwx[j * p + k] = s;
32        }
33    }
34    xtwx
35}
36
37/// Compute X'Wy (length p).
38fn compute_xtwy(x: &FdMatrix, y: &[f64], w: &[f64]) -> Vec<f64> {
39    let (n, p) = x.shape();
40    (0..p)
41        .map(|k| {
42            let mut s = 0.0;
43            for i in 0..n {
44                s += w[i] * x[(i, k)] * y[i];
45            }
46            s
47        })
48        .collect()
49}
50
51/// Solve weighted least squares: min Σ w_i (y_i - x_i'β)² via normal equations.
52fn wls_solve(x: &FdMatrix, y: &[f64], w: &[f64]) -> Result<Vec<f64>, FdarError> {
53    let p = x.ncols();
54    let xtwx = compute_xtwx(x, w);
55    let xtwy = compute_xtwy(x, y, w);
56    cholesky_solve(&xtwx, &xtwy, p)
57}
58
59/// Solve unweighted OLS via Cholesky (for initialization).
60fn ols_init(x: &FdMatrix, y: &[f64]) -> Result<Vec<f64>, FdarError> {
61    let n = x.nrows();
62    let w = vec![1.0; n];
63    wls_solve(x, y, &w)
64}
65
66/// Relative change in coefficient vector: ||β_new - β_old|| / (||β_old|| + ε).
67fn relative_change(beta_new: &[f64], beta_old: &[f64]) -> f64 {
68    let diff_norm: f64 = beta_new
69        .iter()
70        .zip(beta_old)
71        .map(|(&a, &b)| (a - b).powi(2))
72        .sum::<f64>()
73        .sqrt();
74    let old_norm: f64 = beta_old.iter().map(|&b| b * b).sum::<f64>().sqrt();
75    diff_norm / (old_norm + 1e-12)
76}
77
78// ---------------------------------------------------------------------------
79// L1 (Least Absolute Deviations) regression
80// ---------------------------------------------------------------------------
81
82/// L1 (median) functional regression via FPCA + IRLS.
83///
84/// Uses iteratively reweighted least squares to minimise
85/// `Σ|y_i - x_i'β|` instead of `Σ(y_i - x_i'β)²`.
86/// More robust to outliers than OLS.
87///
88/// # Arguments
89/// * `data` - Functional predictor matrix (n × m)
90/// * `y` - Scalar response vector (length n)
91/// * `scalar_covariates` - Optional scalar covariates matrix (n × p)
92/// * `ncomp` - Number of FPC components (if 0, selected by GCV)
93///
94/// # Errors
95///
96/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 3 rows,
97/// zero columns, `y.len() != n`, or `scalar_covariates` row count differs from `n`.
98/// Returns [`FdarError::InvalidParameter`] if auto-selected `ncomp` via CV fails.
99/// Returns [`FdarError::ComputationFailed`] if FPCA fails or the normal equations
100/// are singular.
101///
102/// # Examples
103///
104/// ```
105/// use fdars_core::matrix::FdMatrix;
106/// use fdars_core::scalar_on_function::fregre_l1;
107///
108/// let (n, m) = (20, 30);
109/// let data = FdMatrix::from_column_major(
110///     (0..n * m).map(|k| {
111///         let i = (k % n) as f64;
112///         let j = (k / n) as f64;
113///         ((i + 1.0) * j * 0.2).sin()
114///     }).collect(),
115///     n, m,
116/// ).unwrap();
117/// let y: Vec<f64> = (0..n).map(|i| (i as f64 * 0.5).sin()).collect();
118/// let fit = fregre_l1(&data, &y, None, 3).unwrap();
119/// assert_eq!(fit.fitted_values.len(), 20);
120/// assert_eq!(fit.beta_t.len(), 30);
121/// ```
122#[must_use = "expensive computation whose result should not be discarded"]
123pub fn fregre_l1(
124    data: &FdMatrix,
125    y: &[f64],
126    scalar_covariates: Option<&FdMatrix>,
127    ncomp: usize,
128) -> Result<FregreRobustResult, FdarError> {
129    fregre_robust_irls(data, y, scalar_covariates, ncomp, RobustType::L1)
130}
131
132// ---------------------------------------------------------------------------
133// Huber M-estimation
134// ---------------------------------------------------------------------------
135
136/// Huber M-estimation functional regression via FPCA + IRLS.
137///
138/// Uses the Huber loss: `L(r) = r²/2` for `|r| ≤ k`, `k|r| - k²/2` for `|r| > k`.
139/// Combines the efficiency of OLS for small residuals with the robustness of L1
140/// for large residuals.
141///
142/// # Arguments
143/// * `data` - Functional predictor matrix (n × m)
144/// * `y` - Scalar response vector (length n)
145/// * `scalar_covariates` - Optional scalar covariates matrix (n × p)
146/// * `ncomp` - Number of FPC components (if 0, selected by GCV)
147/// * `k` - Huber tuning constant (default 1.345 for 95% asymptotic efficiency
148///   relative to OLS under Gaussian errors)
149///
150/// # Errors
151///
152/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 3 rows,
153/// zero columns, `y.len() != n`, or `scalar_covariates` row count differs from `n`.
154/// Returns [`FdarError::InvalidParameter`] if `k <= 0` or auto-selected `ncomp`
155/// via CV fails.
156/// Returns [`FdarError::ComputationFailed`] if FPCA fails or the normal equations
157/// are singular.
158///
159/// # Examples
160///
161/// ```
162/// use fdars_core::matrix::FdMatrix;
163/// use fdars_core::scalar_on_function::fregre_huber;
164///
165/// let (n, m) = (20, 30);
166/// let data = FdMatrix::from_column_major(
167///     (0..n * m).map(|k| {
168///         let i = (k % n) as f64;
169///         let j = (k / n) as f64;
170///         ((i + 1.0) * j * 0.2).sin()
171///     }).collect(),
172///     n, m,
173/// ).unwrap();
174/// let y: Vec<f64> = (0..n).map(|i| (i as f64 * 0.5).sin()).collect();
175/// let fit = fregre_huber(&data, &y, None, 3, 1.345).unwrap();
176/// assert_eq!(fit.fitted_values.len(), 20);
177/// assert!(fit.converged);
178/// ```
179#[must_use = "expensive computation whose result should not be discarded"]
180pub fn fregre_huber(
181    data: &FdMatrix,
182    y: &[f64],
183    scalar_covariates: Option<&FdMatrix>,
184    ncomp: usize,
185    k: f64,
186) -> Result<FregreRobustResult, FdarError> {
187    if k <= 0.0 {
188        return Err(FdarError::InvalidParameter {
189            parameter: "k",
190            message: format!("Huber tuning constant must be positive, got {k}"),
191        });
192    }
193    fregre_robust_irls(data, y, scalar_covariates, ncomp, RobustType::Huber(k))
194}
195
196// ---------------------------------------------------------------------------
197// Unified IRLS engine
198// ---------------------------------------------------------------------------
199
200/// Internal enum to select the robust weight function.
201enum RobustType {
202    L1,
203    Huber(f64),
204}
205
206/// Compute IRLS weights from residuals.
207fn compute_weights(residuals: &[f64], robust_type: &RobustType) -> Vec<f64> {
208    match robust_type {
209        RobustType::L1 => residuals
210            .iter()
211            .map(|&r| 1.0 / r.abs().max(L1_EPS))
212            .collect(),
213        RobustType::Huber(k) => residuals
214            .iter()
215            .map(|&r| {
216                let abs_r = r.abs();
217                if abs_r <= *k {
218                    1.0
219                } else {
220                    *k / abs_r
221                }
222            })
223            .collect(),
224    }
225}
226
227/// Unified IRLS implementation for L1 and Huber robust regression.
228fn fregre_robust_irls(
229    data: &FdMatrix,
230    y: &[f64],
231    scalar_covariates: Option<&FdMatrix>,
232    ncomp: usize,
233    robust_type: RobustType,
234) -> Result<FregreRobustResult, FdarError> {
235    let (n, m) = data.shape();
236    validate_fregre_inputs(n, m, y, scalar_covariates)?;
237
238    let ncomp = resolve_ncomp(ncomp, data, y, scalar_covariates, n, m)?;
239
240    // Step 1: FPCA
241    let fpca = fdata_to_pc_1d(data, ncomp)?;
242
243    // Step 2: Build design matrix [1, scores, scalar_covariates]
244    let design = build_design_matrix(&fpca.scores, ncomp, scalar_covariates, n);
245    let p_total = design.ncols();
246
247    // Step 3: Initialize with OLS solution
248    let mut coeffs = ols_init(&design, y)?;
249
250    // Step 4: IRLS loop
251    let mut iterations = 0;
252    let mut converged = false;
253    let mut weights = vec![1.0; n];
254
255    for iter in 0..MAX_ITER {
256        // Compute residuals
257        let fitted = compute_fitted(&design, &coeffs);
258        let residuals: Vec<f64> = y.iter().zip(&fitted).map(|(&yi, &yh)| yi - yh).collect();
259
260        // Compute weights
261        weights = compute_weights(&residuals, &robust_type);
262
263        // Solve weighted least squares
264        let new_coeffs = wls_solve(&design, y, &weights)?;
265
266        // Check convergence
267        let rel_change = relative_change(&new_coeffs, &coeffs);
268        coeffs = new_coeffs;
269        iterations = iter + 1;
270
271        if rel_change < CONV_TOL {
272            converged = true;
273            break;
274        }
275    }
276
277    // Step 5: Compute final fitted values and residuals
278    let fitted_values = compute_fitted(&design, &coeffs);
279    let residuals: Vec<f64> = y
280        .iter()
281        .zip(&fitted_values)
282        .map(|(&yi, &yh)| yi - yh)
283        .collect();
284
285    // Recompute final weights from final residuals
286    weights = compute_weights(&residuals, &robust_type);
287
288    let (r_squared, _) = compute_r_squared(y, &residuals, p_total);
289
290    // Recover β(t) from FPC coefficients
291    let beta_t = recover_beta_t(&coeffs[1..=ncomp], &fpca.rotation, m);
292
293    Ok(FregreRobustResult {
294        intercept: coeffs[0],
295        beta_t,
296        fitted_values,
297        residuals,
298        coefficients: coeffs,
299        ncomp,
300        fpca,
301        iterations,
302        converged,
303        weights,
304        r_squared,
305    })
306}
307
308// ---------------------------------------------------------------------------
309// Prediction
310// ---------------------------------------------------------------------------
311
312/// Predict new responses using a fitted robust functional regression model.
313///
314/// Projects `new_data` onto the FPCA basis stored in `fit`, then applies
315/// the estimated coefficients.
316///
317/// # Arguments
318/// * `fit` - A fitted [`FregreRobustResult`]
319/// * `new_data` - New functional predictor matrix (n_new × m)
320/// * `new_scalar` - Optional new scalar covariates (n_new × p)
321///
322/// # Examples
323///
324/// ```
325/// use fdars_core::matrix::FdMatrix;
326/// use fdars_core::scalar_on_function::{fregre_l1, predict_fregre_robust};
327///
328/// let (n, m) = (20, 30);
329/// let data = FdMatrix::from_column_major(
330///     (0..n * m).map(|k| {
331///         let i = (k % n) as f64;
332///         let j = (k / n) as f64;
333///         ((i + 1.0) * j * 0.2).sin()
334///     }).collect(),
335///     n, m,
336/// ).unwrap();
337/// let y: Vec<f64> = (0..n).map(|i| (i as f64 * 0.5).sin()).collect();
338/// let fit = fregre_l1(&data, &y, None, 3).unwrap();
339/// let preds = predict_fregre_robust(&fit, &data, None);
340/// assert_eq!(preds.len(), 20);
341/// ```
342pub fn predict_fregre_robust(
343    fit: &FregreRobustResult,
344    new_data: &FdMatrix,
345    new_scalar: Option<&FdMatrix>,
346) -> Vec<f64> {
347    let (n_new, m) = new_data.shape();
348    let ncomp = fit.ncomp;
349    let p_scalar = fit.coefficients.len() - 1 - ncomp;
350
351    let mut predictions = vec![0.0; n_new];
352    for i in 0..n_new {
353        let mut yhat = fit.intercept;
354        // Project onto FPC basis: ξ_k = Σ_j (x(t_j) - μ(t_j)) · φ_k(t_j)
355        for k in 0..ncomp {
356            let mut s = 0.0;
357            for j in 0..m {
358                s += (new_data[(i, j)] - fit.fpca.mean[j]) * fit.fpca.rotation[(j, k)];
359            }
360            yhat += fit.coefficients[1 + k] * s;
361        }
362        // Add scalar covariates
363        if let Some(sc) = new_scalar {
364            for j in 0..p_scalar {
365                yhat += fit.coefficients[1 + ncomp + j] * sc[(i, j)];
366            }
367        }
368        predictions[i] = yhat;
369    }
370    predictions
371}