1use super::{
2 build_design_matrix, cholesky_factor, compute_beta_se, compute_fitted, compute_ols_std_errors,
3 compute_r_squared, compute_xtx, ols_solve, recover_beta_t, resolve_ncomp,
4 validate_fregre_inputs, FregreCvResult, FregreLmResult, ModelSelectionResult,
5 SelectionCriterion,
6};
7use crate::cv::create_folds;
8use crate::error::FdarError;
9use crate::matrix::FdMatrix;
10use crate::regression::fdata_to_pc_1d;
11
12#[must_use = "expensive computation whose result should not be discarded"]
63pub fn fregre_lm(
64 data: &FdMatrix,
65 y: &[f64],
66 scalar_covariates: Option<&FdMatrix>,
67 ncomp: usize,
68) -> Result<FregreLmResult, FdarError> {
69 let (n, m) = data.shape();
70 validate_fregre_inputs(n, m, y, scalar_covariates)?;
71
72 let ncomp = resolve_ncomp(ncomp, data, y, scalar_covariates, n, m)?;
73
74 let argvals: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1).max(1) as f64).collect();
75 let fpca = fdata_to_pc_1d(data, ncomp, &argvals)?;
76 let design = build_design_matrix(&fpca.scores, ncomp, scalar_covariates, n);
77 let p_total = design.ncols();
78 let (coeffs, hat_diag) = ols_solve(&design, y)?;
79
80 let fitted_values = compute_fitted(&design, &coeffs);
81 let residuals: Vec<f64> = y
82 .iter()
83 .zip(&fitted_values)
84 .map(|(&yi, &yh)| yi - yh)
85 .collect();
86 let (r_squared, r_squared_adj) = compute_r_squared(y, &residuals, p_total);
87
88 let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
89 let df_resid = (n as f64 - p_total as f64).max(1.0);
90 let residual_se = (ss_res / df_resid).sqrt();
91 let sigma2 = ss_res / df_resid;
92
93 let xtx = compute_xtx(&design);
94 let l = cholesky_factor(&xtx, p_total).unwrap_or_else(|_| vec![1.0; p_total * p_total]);
95 let std_errors = compute_ols_std_errors(&l, p_total, sigma2);
96
97 let gcv = hat_diag
98 .iter()
99 .zip(&residuals)
100 .map(|(&h, &r)| (r / (1.0 - h).max(1e-10)).powi(2))
101 .sum::<f64>()
102 / n as f64;
103
104 let beta_t = recover_beta_t(&coeffs[1..=ncomp], &fpca.rotation, m);
105 let beta_se = compute_beta_se(&std_errors[1..=ncomp], &fpca.rotation, m);
106 let gamma: Vec<f64> = coeffs[1 + ncomp..].to_vec();
107
108 let nf = n as f64;
109 let rss = ss_res;
110 let aic = nf * (rss / nf).ln() + 2.0 * p_total as f64;
111 let bic = nf * (rss / nf).ln() + (nf).ln() * p_total as f64;
112
113 Ok(FregreLmResult {
114 intercept: coeffs[0],
115 beta_t,
116 beta_se,
117 gamma,
118 fitted_values,
119 residuals,
120 r_squared,
121 r_squared_adj,
122 std_errors,
123 ncomp,
124 fpca,
125 coefficients: coeffs,
126 residual_se,
127 gcv,
128 aic,
129 bic,
130 })
131}
132
133fn copy_rows(dst: &mut FdMatrix, src: &FdMatrix, src_rows: &[usize]) {
139 let ncols = src.ncols();
140 for (dst_i, &src_i) in src_rows.iter().enumerate() {
141 for j in 0..ncols {
142 dst[(dst_i, j)] = src[(src_i, j)];
143 }
144 }
145}
146
147fn cv_error_for_k(
149 data: &FdMatrix,
150 y: &[f64],
151 scalar_covariates: Option<&FdMatrix>,
152 k: usize,
153 n_folds: usize,
154 folds: &[usize],
155) -> Result<f64, FdarError> {
156 let n = data.nrows();
157 let ncols = data.ncols();
158 let mut total_error = 0.0;
159 let mut count = 0;
160
161 for fold in 0..n_folds {
162 let train_idx: Vec<usize> = (0..n).filter(|&i| folds[i] != fold).collect();
163 let test_idx: Vec<usize> = (0..n).filter(|&i| folds[i] == fold).collect();
164 let n_test = test_idx.len();
165 if n_test == 0 || train_idx.len() < k + 2 {
166 continue;
167 }
168
169 let mut train_data = FdMatrix::zeros(train_idx.len(), ncols);
170 let mut test_data = FdMatrix::zeros(n_test, ncols);
171 copy_rows(&mut train_data, data, &train_idx);
172 copy_rows(&mut test_data, data, &test_idx);
173
174 let train_y: Vec<f64> = train_idx.iter().map(|&i| y[i]).collect();
175 let test_y: Vec<f64> = test_idx.iter().map(|&i| y[i]).collect();
176
177 let train_sc = scalar_covariates.map(|sc| {
178 let mut m = FdMatrix::zeros(train_idx.len(), sc.ncols());
179 copy_rows(&mut m, sc, &train_idx);
180 m
181 });
182 let test_sc = scalar_covariates.map(|sc| {
183 let mut m = FdMatrix::zeros(n_test, sc.ncols());
184 copy_rows(&mut m, sc, &test_idx);
185 m
186 });
187
188 let Ok(fit) = fregre_lm(&train_data, &train_y, train_sc.as_ref(), k) else {
189 continue;
190 };
191
192 let predictions = predict_fregre_lm(&fit, &test_data, test_sc.as_ref());
193 let fold_mse: f64 = predictions
194 .iter()
195 .zip(&test_y)
196 .map(|(&yhat, &yi)| (yhat - yi).powi(2))
197 .sum::<f64>()
198 / n_test as f64;
199
200 total_error += fold_mse * n_test as f64;
201 count += n_test;
202 }
203
204 if count > 0 {
205 Ok(total_error / count as f64)
206 } else {
207 Err(FdarError::ComputationFailed {
208 operation: "CV error computation",
209 detail: "no valid folds produced predictions; try reducing ncomp or increasing the number of observations".to_string(),
210 })
211 }
212}
213
214#[must_use = "expensive computation whose result should not be discarded"]
231pub fn fregre_cv(
232 data: &FdMatrix,
233 y: &[f64],
234 scalar_covariates: Option<&FdMatrix>,
235 k_min: usize,
236 k_max: usize,
237 n_folds: usize,
238) -> Result<FregreCvResult, FdarError> {
239 let n = data.nrows();
240 if n < n_folds {
241 return Err(FdarError::InvalidDimension {
242 parameter: "data",
243 expected: format!("at least {n_folds} rows (n_folds)"),
244 actual: format!("{n}"),
245 });
246 }
247 if k_min < 1 || k_min > k_max {
248 return Err(FdarError::InvalidParameter {
249 parameter: "k_min/k_max",
250 message: format!("k_min={k_min} must be >= 1 and <= k_max={k_max}"),
251 });
252 }
253
254 let k_max = k_max.min(n - 2).min(data.ncols());
255
256 let folds = create_folds(n, n_folds, 42);
258
259 let mut k_values = Vec::new();
260 let mut cv_errors = Vec::new();
261
262 for k in k_min..=k_max {
263 if let Ok(err) = cv_error_for_k(data, y, scalar_covariates, k, n_folds, &folds) {
264 k_values.push(k);
265 cv_errors.push(err);
266 }
267 }
268
269 if k_values.is_empty() {
270 return Err(FdarError::ComputationFailed {
271 operation: "fregre_cv",
272 detail: "no valid K values produced CV errors; all candidate ncomp values failed — check data for zero-variance columns or increase n".to_string(),
273 });
274 }
275
276 let (optimal_idx, &min_cv_error) = cv_errors
277 .iter()
278 .enumerate()
279 .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
280 .expect("checked: cv_errors is non-empty");
281
282 let optimal_k = k_values[optimal_idx];
283
284 let ncols = data.ncols();
286 let mut oof_predictions = vec![f64::NAN; n];
287 let mut fold_errors = Vec::with_capacity(n_folds);
288
289 for fold in 0..n_folds {
290 let train_idx: Vec<usize> = (0..n).filter(|&i| folds[i] != fold).collect();
291 let test_idx: Vec<usize> = (0..n).filter(|&i| folds[i] == fold).collect();
292 let n_test = test_idx.len();
293 if n_test == 0 || train_idx.len() < optimal_k + 2 {
294 fold_errors.push(f64::NAN);
295 continue;
296 }
297
298 let mut train_data = FdMatrix::zeros(train_idx.len(), ncols);
299 let mut test_data = FdMatrix::zeros(n_test, ncols);
300 copy_rows(&mut train_data, data, &train_idx);
301 copy_rows(&mut test_data, data, &test_idx);
302
303 let train_y: Vec<f64> = train_idx.iter().map(|&i| y[i]).collect();
304
305 let train_sc = scalar_covariates.map(|sc| {
306 let mut m = FdMatrix::zeros(train_idx.len(), sc.ncols());
307 copy_rows(&mut m, sc, &train_idx);
308 m
309 });
310 let test_sc = scalar_covariates.map(|sc| {
311 let mut m = FdMatrix::zeros(n_test, sc.ncols());
312 copy_rows(&mut m, sc, &test_idx);
313 m
314 });
315
316 if let Ok(fit) = fregre_lm(&train_data, &train_y, train_sc.as_ref(), optimal_k) {
317 let preds = predict_fregre_lm(&fit, &test_data, test_sc.as_ref());
318 let mut fold_mse = 0.0;
319 for (ti, &i) in test_idx.iter().enumerate() {
320 oof_predictions[i] = preds[ti];
321 fold_mse += (preds[ti] - y[i]).powi(2);
322 }
323 fold_errors.push(fold_mse / n_test as f64);
324 } else {
325 fold_errors.push(f64::NAN);
326 }
327 }
328
329 Ok(FregreCvResult {
330 k_values: k_values.clone(),
331 cv_errors,
332 optimal_k,
333 min_cv_error,
334 oof_predictions,
335 fold_assignments: folds,
336 fold_errors,
337 })
338}
339
340#[must_use = "expensive computation whose result should not be discarded"]
362pub fn model_selection_ncomp(
363 data: &FdMatrix,
364 y: &[f64],
365 scalar_covariates: Option<&FdMatrix>,
366 max_ncomp: usize,
367 criterion: SelectionCriterion,
368) -> Result<ModelSelectionResult, FdarError> {
369 if max_ncomp == 0 {
370 return Err(FdarError::InvalidParameter {
371 parameter: "max_ncomp",
372 message: "must be >= 1".to_string(),
373 });
374 }
375
376 let mut criteria = Vec::with_capacity(max_ncomp);
377
378 for k in 1..=max_ncomp {
379 if let Ok(fit) = fregre_lm(data, y, scalar_covariates, k) {
380 criteria.push((k, fit.aic, fit.bic, fit.gcv));
381 }
382 }
383
384 if criteria.is_empty() {
385 return Err(FdarError::ComputationFailed {
386 operation: "model_selection_ncomp",
387 detail: "no valid models could be fitted; all candidate ncomp values failed — check data for degeneracy or reduce the range".to_string(),
388 });
389 }
390
391 let best_idx = match criterion {
392 SelectionCriterion::Aic => criteria
393 .iter()
394 .enumerate()
395 .min_by(|(_, a), (_, b)| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
396 .map_or(0, |(i, _)| i),
397 SelectionCriterion::Bic => criteria
398 .iter()
399 .enumerate()
400 .min_by(|(_, a), (_, b)| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal))
401 .map_or(0, |(i, _)| i),
402 SelectionCriterion::Gcv => criteria
403 .iter()
404 .enumerate()
405 .min_by(|(_, a), (_, b)| a.3.partial_cmp(&b.3).unwrap_or(std::cmp::Ordering::Equal))
406 .map_or(0, |(i, _)| i),
407 };
408
409 Ok(ModelSelectionResult {
410 best_ncomp: criteria[best_idx].0,
411 criteria,
412 })
413}
414
415pub fn predict_fregre_lm(
445 fit: &FregreLmResult,
446 new_data: &FdMatrix,
447 new_scalar: Option<&FdMatrix>,
448) -> Vec<f64> {
449 let (n_new, m) = new_data.shape();
450 let ncomp = fit.ncomp;
451 let p_scalar = fit.gamma.len();
452
453 let mut predictions = vec![0.0; n_new];
454 for i in 0..n_new {
455 let mut yhat = fit.intercept;
456 for k in 0..ncomp {
457 let mut s = 0.0;
458 for j in 0..m {
459 s += (new_data[(i, j)] - fit.fpca.mean[j])
460 * fit.fpca.rotation[(j, k)]
461 * fit.fpca.weights[j];
462 }
463 yhat += fit.coefficients[1 + k] * s;
464 }
465 if let Some(sc) = new_scalar {
466 for j in 0..p_scalar {
467 yhat += fit.gamma[j] * sc[(i, j)];
468 }
469 }
470 predictions[i] = yhat;
471 }
472 predictions
473}