fdars_core/scalar_on_function/
fregre_lm.rs1use super::*;
2
3#[must_use = "expensive computation whose result should not be discarded"]
31pub fn fregre_lm(
32 data: &FdMatrix,
33 y: &[f64],
34 scalar_covariates: Option<&FdMatrix>,
35 ncomp: usize,
36) -> Result<FregreLmResult, FdarError> {
37 let (n, m) = data.shape();
38 validate_fregre_inputs(n, m, y, scalar_covariates)?;
39
40 let ncomp = resolve_ncomp(ncomp, data, y, scalar_covariates, n, m)?;
41
42 let fpca = fdata_to_pc_1d(data, ncomp)?;
43 let design = build_design_matrix(&fpca.scores, ncomp, scalar_covariates, n);
44 let p_total = design.ncols();
45 let (coeffs, hat_diag) = ols_solve(&design, y)?;
46
47 let fitted_values = compute_fitted(&design, &coeffs);
48 let residuals: Vec<f64> = y
49 .iter()
50 .zip(&fitted_values)
51 .map(|(&yi, &yh)| yi - yh)
52 .collect();
53 let (r_squared, r_squared_adj) = compute_r_squared(y, &residuals, p_total);
54
55 let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
56 let df_resid = (n as f64 - p_total as f64).max(1.0);
57 let residual_se = (ss_res / df_resid).sqrt();
58 let sigma2 = ss_res / df_resid;
59
60 let xtx = compute_xtx(&design);
61 let l = cholesky_factor(&xtx, p_total).unwrap_or_else(|_| vec![1.0; p_total * p_total]);
62 let std_errors = compute_ols_std_errors(&l, p_total, sigma2);
63
64 let gcv = hat_diag
65 .iter()
66 .zip(&residuals)
67 .map(|(&h, &r)| (r / (1.0 - h).max(1e-10)).powi(2))
68 .sum::<f64>()
69 / n as f64;
70
71 let beta_t = recover_beta_t(&coeffs[1..1 + ncomp], &fpca.rotation, m);
72 let beta_se = compute_beta_se(&std_errors[1..1 + ncomp], &fpca.rotation, m);
73 let gamma: Vec<f64> = coeffs[1 + ncomp..].to_vec();
74
75 let nf = n as f64;
76 let rss = ss_res;
77 let aic = nf * (rss / nf).ln() + 2.0 * p_total as f64;
78 let bic = nf * (rss / nf).ln() + (nf).ln() * p_total as f64;
79
80 Ok(FregreLmResult {
81 intercept: coeffs[0],
82 beta_t,
83 beta_se,
84 gamma,
85 fitted_values,
86 residuals,
87 r_squared,
88 r_squared_adj,
89 std_errors,
90 ncomp,
91 fpca,
92 coefficients: coeffs,
93 residual_se,
94 gcv,
95 aic,
96 bic,
97 })
98}
99
100fn copy_rows(dst: &mut FdMatrix, src: &FdMatrix, src_rows: &[usize]) {
106 let ncols = src.ncols();
107 for (dst_i, &src_i) in src_rows.iter().enumerate() {
108 for j in 0..ncols {
109 dst[(dst_i, j)] = src[(src_i, j)];
110 }
111 }
112}
113
114fn cv_error_for_k(
116 data: &FdMatrix,
117 y: &[f64],
118 scalar_covariates: Option<&FdMatrix>,
119 k: usize,
120 n_folds: usize,
121 folds: &[usize],
122) -> Result<f64, FdarError> {
123 let n = data.nrows();
124 let ncols = data.ncols();
125 let mut total_error = 0.0;
126 let mut count = 0;
127
128 for fold in 0..n_folds {
129 let train_idx: Vec<usize> = (0..n).filter(|&i| folds[i] != fold).collect();
130 let test_idx: Vec<usize> = (0..n).filter(|&i| folds[i] == fold).collect();
131 let n_test = test_idx.len();
132 if n_test == 0 || train_idx.len() < k + 2 {
133 continue;
134 }
135
136 let mut train_data = FdMatrix::zeros(train_idx.len(), ncols);
137 let mut test_data = FdMatrix::zeros(n_test, ncols);
138 copy_rows(&mut train_data, data, &train_idx);
139 copy_rows(&mut test_data, data, &test_idx);
140
141 let train_y: Vec<f64> = train_idx.iter().map(|&i| y[i]).collect();
142 let test_y: Vec<f64> = test_idx.iter().map(|&i| y[i]).collect();
143
144 let train_sc = scalar_covariates.map(|sc| {
145 let mut m = FdMatrix::zeros(train_idx.len(), sc.ncols());
146 copy_rows(&mut m, sc, &train_idx);
147 m
148 });
149 let test_sc = scalar_covariates.map(|sc| {
150 let mut m = FdMatrix::zeros(n_test, sc.ncols());
151 copy_rows(&mut m, sc, &test_idx);
152 m
153 });
154
155 let fit = match fregre_lm(&train_data, &train_y, train_sc.as_ref(), k) {
156 Ok(f) => f,
157 Err(_) => continue,
158 };
159
160 let predictions = predict_fregre_lm(&fit, &test_data, test_sc.as_ref());
161 let fold_mse: f64 = predictions
162 .iter()
163 .zip(&test_y)
164 .map(|(&yhat, &yi)| (yhat - yi).powi(2))
165 .sum::<f64>()
166 / n_test as f64;
167
168 total_error += fold_mse * n_test as f64;
169 count += n_test;
170 }
171
172 if count > 0 {
173 Ok(total_error / count as f64)
174 } else {
175 Err(FdarError::ComputationFailed {
176 operation: "CV error computation",
177 detail: "no valid folds produced predictions".to_string(),
178 })
179 }
180}
181
182#[must_use = "expensive computation whose result should not be discarded"]
199pub fn fregre_cv(
200 data: &FdMatrix,
201 y: &[f64],
202 scalar_covariates: Option<&FdMatrix>,
203 k_min: usize,
204 k_max: usize,
205 n_folds: usize,
206) -> Result<FregreCvResult, FdarError> {
207 let n = data.nrows();
208 if n < n_folds {
209 return Err(FdarError::InvalidDimension {
210 parameter: "data",
211 expected: format!("at least {n_folds} rows (n_folds)"),
212 actual: format!("{n}"),
213 });
214 }
215 if k_min < 1 || k_min > k_max {
216 return Err(FdarError::InvalidParameter {
217 parameter: "k_min/k_max",
218 message: format!("k_min={k_min} must be >= 1 and <= k_max={k_max}"),
219 });
220 }
221
222 let k_max = k_max.min(n - 2).min(data.ncols());
223
224 let folds = create_folds(n, n_folds, 42);
226
227 let mut k_values = Vec::new();
228 let mut cv_errors = Vec::new();
229
230 for k in k_min..=k_max {
231 if let Ok(err) = cv_error_for_k(data, y, scalar_covariates, k, n_folds, &folds) {
232 k_values.push(k);
233 cv_errors.push(err);
234 }
235 }
236
237 if k_values.is_empty() {
238 return Err(FdarError::ComputationFailed {
239 operation: "fregre_cv",
240 detail: "no valid K values produced CV errors".to_string(),
241 });
242 }
243
244 let (optimal_idx, &min_cv_error) = cv_errors
245 .iter()
246 .enumerate()
247 .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
248 .expect("checked: cv_errors is non-empty");
249
250 Ok(FregreCvResult {
251 k_values: k_values.clone(),
252 cv_errors,
253 optimal_k: k_values[optimal_idx],
254 min_cv_error,
255 })
256}
257
258#[must_use = "expensive computation whose result should not be discarded"]
280pub fn model_selection_ncomp(
281 data: &FdMatrix,
282 y: &[f64],
283 scalar_covariates: Option<&FdMatrix>,
284 max_ncomp: usize,
285 criterion: SelectionCriterion,
286) -> Result<ModelSelectionResult, FdarError> {
287 if max_ncomp == 0 {
288 return Err(FdarError::InvalidParameter {
289 parameter: "max_ncomp",
290 message: "must be >= 1".to_string(),
291 });
292 }
293
294 let mut criteria = Vec::with_capacity(max_ncomp);
295
296 for k in 1..=max_ncomp {
297 if let Ok(fit) = fregre_lm(data, y, scalar_covariates, k) {
298 criteria.push((k, fit.aic, fit.bic, fit.gcv));
299 }
300 }
301
302 if criteria.is_empty() {
303 return Err(FdarError::ComputationFailed {
304 operation: "model_selection_ncomp",
305 detail: "no valid models could be fitted".to_string(),
306 });
307 }
308
309 let best_idx = match criterion {
310 SelectionCriterion::Aic => criteria
311 .iter()
312 .enumerate()
313 .min_by(|(_, a), (_, b)| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
314 .map_or(0, |(i, _)| i),
315 SelectionCriterion::Bic => criteria
316 .iter()
317 .enumerate()
318 .min_by(|(_, a), (_, b)| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal))
319 .map_or(0, |(i, _)| i),
320 SelectionCriterion::Gcv => criteria
321 .iter()
322 .enumerate()
323 .min_by(|(_, a), (_, b)| a.3.partial_cmp(&b.3).unwrap_or(std::cmp::Ordering::Equal))
324 .map_or(0, |(i, _)| i),
325 };
326
327 Ok(ModelSelectionResult {
328 best_ncomp: criteria[best_idx].0,
329 criteria,
330 })
331}
332
333pub fn predict_fregre_lm(
340 fit: &FregreLmResult,
341 new_data: &FdMatrix,
342 new_scalar: Option<&FdMatrix>,
343) -> Vec<f64> {
344 let (n_new, m) = new_data.shape();
345 let ncomp = fit.ncomp;
346 let p_scalar = fit.gamma.len();
347
348 let mut predictions = vec![0.0; n_new];
349 for i in 0..n_new {
350 let mut yhat = fit.intercept;
351 for k in 0..ncomp {
352 let mut s = 0.0;
353 for j in 0..m {
354 s += (new_data[(i, j)] - fit.fpca.mean[j]) * fit.fpca.rotation[(j, k)];
355 }
356 yhat += fit.coefficients[1 + k] * s;
357 }
358 if let Some(sc) = new_scalar {
359 for j in 0..p_scalar {
360 yhat += fit.gamma[j] * sc[(i, j)];
361 }
362 }
363 predictions[i] = yhat;
364 }
365 predictions
366}