Skip to main content

scirs2_stats/functional/
regression.rs

1//! Functional regression models.
2//!
3//! Provides scalar-on-function and function-on-function regression:
4//!
5//! - **Scalar-on-function**: Predict a scalar response from functional predictors.
6//!   Model: y_i = alpha + integral x_i(t) * beta(t) dt + epsilon_i
7//!
8//! - **Function-on-function**: Predict a functional response from functional predictors.
9//!   Model: y_i(t) = integral x_i(s) * beta(s,t) ds + epsilon_i(t)
10//!
11//! Both use penalized basis expansion for the coefficient function(s).
12
13use scirs2_core::ndarray::{Array1, Array2};
14
15use super::basis::{compute_basis_coefficients, evaluate_basis};
16use super::types::{BasisType, FoFResult, FunctionalConfig, FunctionalData, SoFResult};
17use crate::error::{StatsError, StatsResult};
18
19/// Compute R-squared (coefficient of determination).
20///
21/// R^2 = 1 - SS_res / SS_tot
22///
23/// Returns a value in (-inf, 1]. A value of 1.0 indicates perfect fit.
24pub fn r_squared(y: &[f64], y_hat: &[f64]) -> f64 {
25    let n = y.len();
26    if n == 0 {
27        return 0.0;
28    }
29    let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
30    let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
31    let ss_res: f64 = y
32        .iter()
33        .zip(y_hat.iter())
34        .map(|(&yi, &yh)| (yi - yh).powi(2))
35        .sum();
36
37    if ss_tot < 1e-14 {
38        return if ss_res < 1e-14 { 1.0 } else { 0.0 };
39    }
40    1.0 - ss_res / ss_tot
41}
42
43/// Scalar-on-function regression.
44///
45/// Model: y_i = alpha + integral x_i(t) * beta(t) dt + epsilon_i
46///
47/// The coefficient function beta(t) is expanded in a basis:
48///   beta(t) = sum_k b_k * phi_k(t)
49///
50/// The integral becomes: integral x_i(t) * beta(t) dt approx= sum_j x_i(t_j) * beta(t_j) * dt_j
51///
52/// After discretization: y = alpha + X_int * b + epsilon
53/// where `X_int[i,k] = sum_j x_i(t_j) * phi_k(t_j) * dt_j`
54///
55/// Penalized least squares: b = (X_int' X_int + lambda * P)^{-1} X_int' (y - alpha)
56pub struct ScalarOnFunctionRegression;
57
58impl ScalarOnFunctionRegression {
59    /// Fit a scalar-on-function regression model.
60    ///
61    /// # Arguments
62    /// * `data` - Functional predictor data (n curves on a grid)
63    /// * `y` - Scalar responses (length n)
64    /// * `config` - Configuration (basis type, smoothing parameter)
65    ///
66    /// # Returns
67    /// A `SoFResult` containing the estimated coefficient function, intercept, etc.
68    ///
69    /// # Errors
70    /// Returns an error if dimensions mismatch or the linear system cannot be solved.
71    pub fn fit(
72        data: &FunctionalData,
73        y: &[f64],
74        config: &FunctionalConfig,
75    ) -> StatsResult<SoFResult> {
76        let n_curves = data.n_curves();
77        let n_grid = data.n_grid();
78
79        if y.len() != n_curves {
80            return Err(StatsError::DimensionMismatch(format!(
81                "y has length {}, but data has {} curves",
82                y.len(),
83                n_curves
84            )));
85        }
86
87        // Evaluate basis
88        let phi = evaluate_basis(&data.grid, &config.basis)?;
89        let n_basis = phi.ncols();
90
91        // Compute grid spacing for numerical integration (trapezoidal)
92        let dt = compute_grid_spacing(&data.grid);
93
94        // Build the design matrix X_int: X_int[i,k] = integral x_i(t) * phi_k(t) dt
95        // Using raw predictor curves (the basis expansion of beta provides smoothing)
96        let mut x_int = Array2::<f64>::zeros((n_curves, n_basis));
97        for i in 0..n_curves {
98            for k in 0..n_basis {
99                let mut integral = 0.0;
100                for j in 0..n_grid {
101                    integral += data.observations[i][j] * phi[[j, k]] * dt[j];
102                }
103                x_int[[i, k]] = integral;
104            }
105        }
106
107        // Compute intercept: y_mean
108        let y_arr = Array1::from_vec(y.to_vec());
109        let y_mean = y_arr.mean().unwrap_or(0.0);
110
111        // Center y
112        let y_centered: Array1<f64> = y_arr
113            .iter()
114            .map(|&yi| yi - y_mean)
115            .collect::<Vec<_>>()
116            .into();
117
118        // Determine lambda for the coefficient function penalty
119        let lambda_beta = self_select_lambda_sof(&x_int, &y_centered, n_basis);
120
121        // Solve: (X_int' X_int + lambda * P) b = X_int' y_centered
122        let xtx = x_int.t().dot(&x_int);
123        let xty = x_int.t().dot(&y_centered);
124        let penalty = second_derivative_penalty(n_basis);
125        let mut lhs = xtx + &(&penalty * lambda_beta);
126
127        let b = solve_symmetric_positive(&mut lhs, &xty)?;
128
129        // Evaluate beta(t) = phi * b
130        let beta_func = phi.dot(&b);
131
132        // Compute fitted values
133        let fitted_centered = x_int.dot(&b);
134        let fitted: Array1<f64> = fitted_centered
135            .iter()
136            .map(|&fc| fc + y_mean)
137            .collect::<Vec<_>>()
138            .into();
139
140        let r2 = r_squared(y, fitted.as_slice().unwrap_or(&[]));
141
142        Ok(SoFResult {
143            beta: beta_func,
144            intercept: y_mean,
145            beta_coefficients: b,
146            basis: config.basis.clone(),
147            grid: data.grid.clone(),
148            lambda: lambda_beta,
149            fitted_values: fitted,
150            r_squared: r2,
151        })
152    }
153
154    /// Predict scalar responses for new functional data.
155    ///
156    /// # Arguments
157    /// * `result` - Fitted model result
158    /// * `new_data` - New functional predictor data
159    ///
160    /// # Returns
161    /// Predicted scalar responses.
162    ///
163    /// # Errors
164    /// Returns an error if grids don't match or basis evaluation fails.
165    pub fn predict(result: &SoFResult, new_data: &FunctionalData) -> StatsResult<Vec<f64>> {
166        if new_data.grid.len() != result.grid.len() {
167            return Err(StatsError::DimensionMismatch(format!(
168                "New data grid has {} points, model was fitted on {} points",
169                new_data.grid.len(),
170                result.grid.len()
171            )));
172        }
173
174        let phi = evaluate_basis(&new_data.grid, &result.basis)?;
175        let dt = compute_grid_spacing(&new_data.grid);
176        let n_grid = new_data.n_grid();
177        let n_basis = phi.ncols();
178
179        let mut predictions = Vec::with_capacity(new_data.n_curves());
180        for obs in &new_data.observations {
181            // Compute integral: sum_j x(t_j) * phi_k(t_j) * dt_j
182            let mut x_int_row = Array1::<f64>::zeros(n_basis);
183            for k in 0..n_basis {
184                let mut integral = 0.0;
185                for j in 0..n_grid {
186                    integral += obs[j] * phi[[j, k]] * dt[j];
187                }
188                x_int_row[k] = integral;
189            }
190
191            let pred_centered: f64 = x_int_row
192                .iter()
193                .zip(result.beta_coefficients.iter())
194                .map(|(&x, &b)| x * b)
195                .sum();
196            predictions.push(pred_centered + result.intercept);
197        }
198
199        Ok(predictions)
200    }
201}
202
203/// Function-on-function regression.
204///
205/// Model: y_i(t) = integral x_i(s) * beta(s,t) ds + epsilon_i(t)
206///
207/// The bivariate coefficient beta(s,t) is expanded in a tensor product basis:
208///   beta(s,t) = sum_{j,k} B_{jk} * phi_j(s) * psi_k(t)
209///
210/// This is solved via penalized least squares on the vectorized coefficients.
211pub struct FunctionOnFunctionRegression;
212
213impl FunctionOnFunctionRegression {
214    /// Fit a function-on-function regression model.
215    ///
216    /// # Arguments
217    /// * `predictor_data` - Functional predictor data (n curves)
218    /// * `response_data` - Functional response data (n curves, possibly different grid)
219    /// * `predictor_config` - Configuration for predictor basis
220    /// * `response_config` - Configuration for response basis
221    ///
222    /// # Returns
223    /// A `FoFResult` containing the bivariate coefficient surface, fitted curves, etc.
224    ///
225    /// # Errors
226    /// Returns an error if dimensions mismatch or the linear system fails.
227    pub fn fit(
228        predictor_data: &FunctionalData,
229        response_data: &FunctionalData,
230        predictor_config: &FunctionalConfig,
231        response_config: &FunctionalConfig,
232    ) -> StatsResult<FoFResult> {
233        let n_curves = predictor_data.n_curves();
234        if n_curves != response_data.n_curves() {
235            return Err(StatsError::DimensionMismatch(format!(
236                "Predictor has {} curves, response has {} curves",
237                n_curves,
238                response_data.n_curves()
239            )));
240        }
241
242        let n_grid_s = predictor_data.n_grid();
243        let n_grid_t = response_data.n_grid();
244
245        // Evaluate predictor and response bases
246        let phi_s = evaluate_basis(&predictor_data.grid, &predictor_config.basis)?;
247        let phi_t = evaluate_basis(&response_data.grid, &response_config.basis)?;
248        let n_basis_s = phi_s.ncols();
249        let n_basis_t = phi_t.ncols();
250        let n_total = n_basis_s * n_basis_t;
251
252        // Grid spacings
253        let ds = compute_grid_spacing(&predictor_data.grid);
254        let dt = compute_grid_spacing(&response_data.grid);
255
256        // Smoothing parameters
257        let lambda_s = predictor_config.smoothing_param.unwrap_or(1e-4);
258        let lambda_t = response_config.smoothing_param.unwrap_or(1e-4);
259
260        // Smooth predictor curves and compute integral design matrices
261        // For each curve i and predictor basis j:
262        //   X_s[i,j] = integral x_i(s) * phi_j(s) ds
263        let mut x_s = Array2::<f64>::zeros((n_curves, n_basis_s));
264        for i in 0..n_curves {
265            let coeffs =
266                compute_basis_coefficients(&predictor_data.observations[i], &phi_s, lambda_s)?;
267            let smoothed = phi_s.dot(&coeffs);
268            for j in 0..n_basis_s {
269                let mut integral = 0.0;
270                for l in 0..n_grid_s {
271                    integral += smoothed[l] * phi_s[[l, j]] * ds[l];
272                }
273                x_s[[i, j]] = integral;
274            }
275        }
276
277        // Smooth response curves and get coefficients
278        let mut y_coeffs = Array2::<f64>::zeros((n_curves, n_basis_t));
279        for i in 0..n_curves {
280            let coeffs =
281                compute_basis_coefficients(&response_data.observations[i], &phi_t, lambda_t)?;
282            for k in 0..n_basis_t {
283                y_coeffs[[i, k]] = coeffs[k];
284            }
285        }
286
287        // Build the regression: for each response basis k:
288        //   y_coeffs[:, k] = X_s * B[:, k]
289        // This is n_basis_t separate regressions, or one big system.
290        //
291        // We solve column-by-column with a penalty on B.
292        let penalty_s = second_derivative_penalty(n_basis_s);
293        let lambda_pen = (lambda_s + lambda_t) / 2.0; // Combined penalty
294
295        let xtx = x_s.t().dot(&x_s);
296        let mut lhs = &xtx + &(&penalty_s * lambda_pen);
297
298        let mut b_matrix = Array2::<f64>::zeros((n_basis_s, n_basis_t));
299        for k in 0..n_basis_t {
300            let rhs = x_s.t().dot(&y_coeffs.column(k));
301            let b_col = solve_symmetric_positive(&mut lhs, &rhs)?;
302            for j in 0..n_basis_s {
303                b_matrix[[j, k]] = b_col[j];
304            }
305        }
306
307        // Evaluate beta(s,t) on the grids
308        // beta(s,t) = sum_{j,k} B_{jk} * phi_j(s) * psi_k(t)
309        let mut beta_surface = Array2::<f64>::zeros((n_grid_s, n_grid_t));
310        for si in 0..n_grid_s {
311            for ti in 0..n_grid_t {
312                let mut val = 0.0;
313                for j in 0..n_basis_s {
314                    for k in 0..n_basis_t {
315                        val += b_matrix[[j, k]] * phi_s[[si, j]] * phi_t[[ti, k]];
316                    }
317                }
318                beta_surface[[si, ti]] = val;
319            }
320        }
321
322        // Compute fitted response curves
323        // y_hat_i(t) = sum_k (sum_j X_s[i,j] * B[j,k]) * psi_k(t)
324        let y_hat_coeffs = x_s.dot(&b_matrix); // (n_curves, n_basis_t)
325        let fitted_curves = y_hat_coeffs.dot(&phi_t.t()); // (n_curves, n_grid_t)
326
327        // Vectorize B for storage
328        let mut beta_vec = Array1::<f64>::zeros(n_total);
329        for j in 0..n_basis_s {
330            for k in 0..n_basis_t {
331                beta_vec[j * n_basis_t + k] = b_matrix[[j, k]];
332            }
333        }
334
335        Ok(FoFResult {
336            beta_surface,
337            beta_coefficients: beta_vec,
338            predictor_basis: predictor_config.basis.clone(),
339            response_basis: response_config.basis.clone(),
340            predictor_grid: predictor_data.grid.clone(),
341            response_grid: response_data.grid.clone(),
342            lambda: lambda_pen,
343            fitted_curves,
344        })
345    }
346
347    /// Predict response curves for new predictor data.
348    ///
349    /// # Arguments
350    /// * `result` - Fitted model result
351    /// * `new_data` - New functional predictor data
352    ///
353    /// # Returns
354    /// Predicted response curves, one per input curve.
355    ///
356    /// # Errors
357    /// Returns an error if grids don't match or computation fails.
358    pub fn predict(result: &FoFResult, new_data: &FunctionalData) -> StatsResult<Vec<Vec<f64>>> {
359        let n_grid_s = result.predictor_grid.len();
360        if new_data.n_grid() != n_grid_s {
361            return Err(StatsError::DimensionMismatch(format!(
362                "New data grid has {} points, predictor was fitted on {} points",
363                new_data.n_grid(),
364                n_grid_s
365            )));
366        }
367
368        let phi_s = evaluate_basis(&new_data.grid, &result.predictor_basis)?;
369        let phi_t = evaluate_basis(&result.response_grid, &result.response_basis)?;
370        let n_basis_s = phi_s.ncols();
371        let n_basis_t = phi_t.ncols();
372        let ds = compute_grid_spacing(&new_data.grid);
373
374        // Reconstruct B matrix
375        let mut b_matrix = Array2::<f64>::zeros((n_basis_s, n_basis_t));
376        for j in 0..n_basis_s {
377            for k in 0..n_basis_t {
378                b_matrix[[j, k]] = result.beta_coefficients[j * n_basis_t + k];
379            }
380        }
381
382        let lambda_smooth = result.lambda.min(1e-4);
383        let mut predictions = Vec::with_capacity(new_data.n_curves());
384
385        for obs in &new_data.observations {
386            let coeffs = compute_basis_coefficients(obs, &phi_s, lambda_smooth)?;
387            let smoothed = phi_s.dot(&coeffs);
388
389            // Compute X_s row
390            let mut x_row = Array1::<f64>::zeros(n_basis_s);
391            for j in 0..n_basis_s {
392                let mut integral = 0.0;
393                for l in 0..n_grid_s {
394                    integral += smoothed[l] * phi_s[[l, j]] * ds[l];
395                }
396                x_row[j] = integral;
397            }
398
399            // y_hat(t) = sum_k (sum_j x_row[j] * B[j,k]) * psi_k(t)
400            let coeff_t = x_row.dot(&b_matrix); // (n_basis_t,)
401            let pred_curve = phi_t.dot(&coeff_t); // (n_grid_t,)
402            predictions.push(pred_curve.to_vec());
403        }
404
405        Ok(predictions)
406    }
407}
408
409/// Simple GCV-like lambda selection for scalar-on-function regression.
410fn self_select_lambda_sof(x_int: &Array2<f64>, y: &Array1<f64>, n_basis: usize) -> f64 {
411    let n = y.len() as f64;
412    let xtx = x_int.t().dot(x_int);
413    let xty = x_int.t().dot(y);
414    let penalty = second_derivative_penalty(n_basis);
415
416    let mut best_lambda = 1e-2;
417    let mut best_gcv = f64::INFINITY;
418
419    let log_lambdas: Vec<f64> = (-6..=4).map(|k| 10.0f64.powi(k)).collect();
420
421    for &lam in &log_lambdas {
422        let mut lhs = &xtx + &(&penalty * lam);
423        let b = match solve_symmetric_positive(&mut lhs, &xty) {
424            Ok(b) => b,
425            Err(_) => continue,
426        };
427
428        let y_hat = x_int.dot(&b);
429        let residuals = y - &y_hat;
430        let rss: f64 = residuals.iter().map(|r| r * r).sum();
431
432        // Approximate effective df
433        let trace = compute_trace_approx(&xtx, &penalty, lam, n_basis);
434        let denom = 1.0 - trace / n;
435        if denom.abs() < 1e-10 {
436            continue;
437        }
438        let gcv = (rss / n) / (denom * denom);
439
440        if gcv < best_gcv {
441            best_gcv = gcv;
442            best_lambda = lam;
443        }
444    }
445
446    best_lambda
447}
448
449/// Approximate trace of (X'X + lambda*P)^{-1} X'X.
450fn compute_trace_approx(xtx: &Array2<f64>, penalty: &Array2<f64>, lambda: f64, n: usize) -> f64 {
451    let mut lhs = xtx + &(penalty * lambda);
452    let mut trace = 0.0;
453    for j in 0..n {
454        let rhs = xtx.column(j).to_owned();
455        if let Ok(x) = solve_symmetric_positive(&mut lhs, &rhs) {
456            trace += x[j];
457        }
458    }
459    trace
460}
461
462/// Compute trapezoidal-rule grid spacing weights.
463fn compute_grid_spacing(grid: &[f64]) -> Vec<f64> {
464    let n = grid.len();
465    if n <= 1 {
466        return vec![1.0; n];
467    }
468    let mut dt = vec![0.0; n];
469    dt[0] = (grid[1] - grid[0]) / 2.0;
470    for i in 1..n - 1 {
471        dt[i] = (grid[i + 1] - grid[i - 1]) / 2.0;
472    }
473    dt[n - 1] = (grid[n - 1] - grid[n - 2]) / 2.0;
474    dt
475}
476
477/// Construct a second-derivative penalty matrix.
478fn second_derivative_penalty(n_basis: usize) -> Array2<f64> {
479    if n_basis < 3 {
480        return Array2::<f64>::zeros((n_basis, n_basis));
481    }
482    let m = n_basis - 2;
483    let mut d2 = Array2::<f64>::zeros((m, n_basis));
484    for i in 0..m {
485        d2[[i, i]] = 1.0;
486        d2[[i, i + 1]] = -2.0;
487        d2[[i, i + 2]] = 1.0;
488    }
489    d2.t().dot(&d2)
490}
491
492/// Solve symmetric positive-definite system via Cholesky.
493fn solve_symmetric_positive(a: &mut Array2<f64>, b: &Array1<f64>) -> StatsResult<Array1<f64>> {
494    let n = a.nrows();
495    if n != a.ncols() || n != b.len() {
496        return Err(StatsError::DimensionMismatch(format!(
497            "Matrix is {}x{}, vector length {}",
498            a.nrows(),
499            a.ncols(),
500            b.len()
501        )));
502    }
503
504    // Regularize
505    for i in 0..n {
506        a[[i, i]] += 1e-10;
507    }
508
509    let mut l = Array2::<f64>::zeros((n, n));
510    for j in 0..n {
511        let mut sum = 0.0;
512        for k in 0..j {
513            sum += l[[j, k]] * l[[j, k]];
514        }
515        let diag = a[[j, j]] - sum;
516        if diag <= 0.0 {
517            return Err(StatsError::ComputationError(
518                "Matrix is not positive definite".to_string(),
519            ));
520        }
521        l[[j, j]] = diag.sqrt();
522
523        for i in (j + 1)..n {
524            let mut sum2 = 0.0;
525            for k in 0..j {
526                sum2 += l[[i, k]] * l[[j, k]];
527            }
528            l[[i, j]] = (a[[i, j]] - sum2) / l[[j, j]];
529        }
530    }
531
532    let mut z = Array1::<f64>::zeros(n);
533    for i in 0..n {
534        let mut s = b[i];
535        for k in 0..i {
536            s -= l[[i, k]] * z[k];
537        }
538        z[i] = s / l[[i, i]];
539    }
540
541    let mut x = Array1::<f64>::zeros(n);
542    for i in (0..n).rev() {
543        let mut s = z[i];
544        for k in (i + 1)..n {
545            s -= l[[k, i]] * x[k];
546        }
547        x[i] = s / l[[i, i]];
548    }
549
550    Ok(x)
551}
552
553#[cfg(test)]
554mod tests {
555    use super::*;
556    use crate::functional::types::FunctionalData;
557
558    #[test]
559    fn test_r_squared_perfect_fit() {
560        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
561        let y_hat = vec![1.0, 2.0, 3.0, 4.0, 5.0];
562        let r2 = r_squared(&y, &y_hat);
563        assert!((r2 - 1.0).abs() < 1e-10, "R^2 should be 1.0, got {}", r2);
564    }
565
566    #[test]
567    fn test_r_squared_mean_model() {
568        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
569        let y_hat = vec![3.0, 3.0, 3.0, 3.0, 3.0]; // mean prediction
570        let r2 = r_squared(&y, &y_hat);
571        assert!(r2.abs() < 1e-10, "R^2 should be 0.0, got {}", r2);
572    }
573
574    #[test]
575    fn test_scalar_on_function_known_beta() {
576        // True model: y_i = integral x_i(t) * beta(t) dt
577        // where beta(t) = sin(pi * t).
578        // Predictor curves must span a rich enough subspace to identify beta.
579        // Use Fourier-like curves: x_i(t) = sum_k c_{ik} * sin(k*pi*t)
580        let n_grid = 80;
581        let grid: Vec<f64> = (0..n_grid)
582            .map(|i| i as f64 / (n_grid - 1) as f64)
583            .collect();
584        let n_curves = 60;
585        let n_harmonics = 8;
586
587        let dt: Vec<f64> = {
588            let mut d = vec![0.0; n_grid];
589            d[0] = (grid[1] - grid[0]) / 2.0;
590            for i in 1..n_grid - 1 {
591                d[i] = (grid[i + 1] - grid[i - 1]) / 2.0;
592            }
593            d[n_grid - 1] = (grid[n_grid - 1] - grid[n_grid - 2]) / 2.0;
594            d
595        };
596
597        let beta_true: Vec<f64> = grid
598            .iter()
599            .map(|&t| (std::f64::consts::PI * t).sin())
600            .collect();
601
602        let mut observations = Vec::with_capacity(n_curves);
603        let mut y = Vec::with_capacity(n_curves);
604
605        for i in 0..n_curves {
606            // Rich predictor curve using multiple harmonics
607            let curve: Vec<f64> = grid
608                .iter()
609                .map(|&t| {
610                    let mut val = 0.0;
611                    for k in 1..=n_harmonics {
612                        let c = ((i * k) as f64 * 0.37 + k as f64 * 0.13).sin();
613                        val += c * (k as f64 * std::f64::consts::PI * t).sin();
614                    }
615                    val
616                })
617                .collect();
618
619            // True response: integral x(t) * beta(t) dt
620            let yi: f64 = curve
621                .iter()
622                .zip(beta_true.iter())
623                .zip(dt.iter())
624                .map(|((&x, &b), &d)| x * b * d)
625                .sum();
626            y.push(yi);
627            observations.push(curve);
628        }
629
630        let data =
631            FunctionalData::new(grid.clone(), observations).expect("Data creation should succeed");
632        let config = FunctionalConfig {
633            basis: BasisType::BSpline {
634                n_basis: 20,
635                degree: 3,
636            },
637            smoothing_param: Some(1e-6),
638            n_components: 3,
639        };
640
641        let result =
642            ScalarOnFunctionRegression::fit(&data, &y, &config).expect("Fit should succeed");
643
644        // R-squared should be close to 1.0 (clean data)
645        assert!(
646            result.r_squared > 0.95,
647            "R^2 should be > 0.95, got {}",
648            result.r_squared
649        );
650
651        // Beta should resemble sin(pi*t)
652        // Check correlation between estimated and true beta
653        let beta_est = result.beta.as_slice().unwrap_or(&[]);
654        let mean_est = beta_est.iter().sum::<f64>() / beta_est.len() as f64;
655        let mean_true = beta_true.iter().sum::<f64>() / beta_true.len() as f64;
656        let cov: f64 = beta_est
657            .iter()
658            .zip(beta_true.iter())
659            .map(|(&e, &t)| (e - mean_est) * (t - mean_true))
660            .sum();
661        let var_est: f64 = beta_est.iter().map(|&e| (e - mean_est).powi(2)).sum();
662        let var_true: f64 = beta_true.iter().map(|&t| (t - mean_true).powi(2)).sum();
663        let denom = var_est.sqrt() * var_true.sqrt();
664        let corr = if denom > 1e-14 { cov / denom } else { 0.0 };
665        assert!(
666            corr > 0.9,
667            "Correlation between true and estimated beta should be > 0.9, got {}",
668            corr
669        );
670    }
671
672    #[test]
673    fn test_scalar_on_function_prediction() {
674        let n_grid = 60;
675        let grid: Vec<f64> = (0..n_grid)
676            .map(|i| i as f64 / (n_grid - 1) as f64)
677            .collect();
678        let n_train = 30;
679        let n_test = 10;
680
681        let dt: Vec<f64> = {
682            let mut d = vec![0.0; n_grid];
683            d[0] = (grid[1] - grid[0]) / 2.0;
684            for i in 1..n_grid - 1 {
685                d[i] = (grid[i + 1] - grid[i - 1]) / 2.0;
686            }
687            d[n_grid - 1] = (grid[n_grid - 1] - grid[n_grid - 2]) / 2.0;
688            d
689        };
690
691        let beta_true: Vec<f64> = grid.iter().map(|&t| t * (1.0 - t)).collect();
692
693        // Generate training data
694        let mut train_obs = Vec::with_capacity(n_train);
695        let mut y_train = Vec::with_capacity(n_train);
696        for i in 0..n_train {
697            let a = (i as f64 * 0.5).sin();
698            let curve: Vec<f64> = grid.iter().map(|&t| a * (2.0 * t - 1.0)).collect();
699            let yi: f64 = curve
700                .iter()
701                .zip(beta_true.iter())
702                .zip(dt.iter())
703                .map(|((&x, &b), &d)| x * b * d)
704                .sum();
705            y_train.push(yi);
706            train_obs.push(curve);
707        }
708
709        // Generate test data
710        let mut test_obs = Vec::with_capacity(n_test);
711        let mut y_test = Vec::with_capacity(n_test);
712        for i in 0..n_test {
713            let a = ((n_train + i) as f64 * 0.5).sin();
714            let curve: Vec<f64> = grid.iter().map(|&t| a * (2.0 * t - 1.0)).collect();
715            let yi: f64 = curve
716                .iter()
717                .zip(beta_true.iter())
718                .zip(dt.iter())
719                .map(|((&x, &b), &d)| x * b * d)
720                .sum();
721            y_test.push(yi);
722            test_obs.push(curve);
723        }
724
725        let train_data = FunctionalData::new(grid.clone(), train_obs)
726            .expect("Train data creation should succeed");
727        let test_data =
728            FunctionalData::new(grid.clone(), test_obs).expect("Test data creation should succeed");
729
730        let config = FunctionalConfig {
731            basis: BasisType::BSpline {
732                n_basis: 15,
733                degree: 3,
734            },
735            smoothing_param: Some(1e-5),
736            n_components: 3,
737        };
738
739        let result = ScalarOnFunctionRegression::fit(&train_data, &y_train, &config)
740            .expect("Fit should succeed");
741
742        let predictions = ScalarOnFunctionRegression::predict(&result, &test_data)
743            .expect("Predict should succeed");
744
745        assert_eq!(predictions.len(), n_test);
746
747        let r2_test = r_squared(&y_test, &predictions);
748        assert!(r2_test > 0.9, "Test R^2 should be > 0.9, got {}", r2_test);
749    }
750
751    #[test]
752    fn test_function_on_function_basic() {
753        let n_grid = 40;
754        let grid: Vec<f64> = (0..n_grid)
755            .map(|i| i as f64 / (n_grid - 1) as f64)
756            .collect();
757        let n_curves = 25;
758
759        // Simple model: y_i(t) = c_i * t where c_i depends on x_i
760        let mut pred_obs = Vec::with_capacity(n_curves);
761        let mut resp_obs = Vec::with_capacity(n_curves);
762
763        for i in 0..n_curves {
764            let a = (i as f64 * 0.4).sin();
765            let pred_curve: Vec<f64> = grid.iter().map(|&s| a * s).collect();
766            // Response is proportional to the predictor's "energy"
767            let energy: f64 = pred_curve.iter().map(|x| x * x).sum::<f64>() / n_grid as f64;
768            let resp_curve: Vec<f64> = grid.iter().map(|&t| energy * t).collect();
769            pred_obs.push(pred_curve);
770            resp_obs.push(resp_curve);
771        }
772
773        let pred_data = FunctionalData::new(grid.clone(), pred_obs)
774            .expect("Predictor data creation should succeed");
775        let resp_data = FunctionalData::new(grid.clone(), resp_obs)
776            .expect("Response data creation should succeed");
777
778        let config = FunctionalConfig {
779            basis: BasisType::BSpline {
780                n_basis: 10,
781                degree: 3,
782            },
783            smoothing_param: Some(1e-4),
784            n_components: 3,
785        };
786
787        let result = FunctionOnFunctionRegression::fit(&pred_data, &resp_data, &config, &config)
788            .expect("FoF fit should succeed");
789
790        // Check that beta_surface has correct dimensions
791        assert_eq!(result.beta_surface.nrows(), n_grid);
792        assert_eq!(result.beta_surface.ncols(), n_grid);
793
794        // Fitted curves should have correct shape
795        assert_eq!(result.fitted_curves.nrows(), n_curves);
796        assert_eq!(result.fitted_curves.ncols(), n_grid);
797    }
798
799    #[test]
800    fn test_function_on_function_prediction() {
801        let n_grid = 30;
802        let grid: Vec<f64> = (0..n_grid)
803            .map(|i| i as f64 / (n_grid - 1) as f64)
804            .collect();
805        let n_train = 20;
806        let n_test = 5;
807
808        let mut train_pred = Vec::with_capacity(n_train);
809        let mut train_resp = Vec::with_capacity(n_train);
810        for i in 0..n_train {
811            let a = (i as f64 * 0.3).sin();
812            let pred: Vec<f64> = grid.iter().map(|&s| a * s).collect();
813            let resp: Vec<f64> = grid.iter().map(|&t| a * t * 0.5).collect();
814            train_pred.push(pred);
815            train_resp.push(resp);
816        }
817
818        let mut test_pred = Vec::with_capacity(n_test);
819        for i in 0..n_test {
820            let a = ((n_train + i) as f64 * 0.3).sin();
821            let pred: Vec<f64> = grid.iter().map(|&s| a * s).collect();
822            test_pred.push(pred);
823        }
824
825        let train_pred_data = FunctionalData::new(grid.clone(), train_pred)
826            .expect("Train pred creation should succeed");
827        let train_resp_data = FunctionalData::new(grid.clone(), train_resp)
828            .expect("Train resp creation should succeed");
829        let test_pred_data = FunctionalData::new(grid.clone(), test_pred)
830            .expect("Test pred creation should succeed");
831
832        let config = FunctionalConfig {
833            basis: BasisType::BSpline {
834                n_basis: 8,
835                degree: 3,
836            },
837            smoothing_param: Some(1e-3),
838            n_components: 3,
839        };
840
841        let result =
842            FunctionOnFunctionRegression::fit(&train_pred_data, &train_resp_data, &config, &config)
843                .expect("FoF fit should succeed");
844
845        let predictions = FunctionOnFunctionRegression::predict(&result, &test_pred_data)
846            .expect("FoF predict should succeed");
847
848        assert_eq!(predictions.len(), n_test);
849        assert_eq!(predictions[0].len(), n_grid);
850    }
851
852    #[test]
853    fn test_r_squared_close_to_one_clean_data() {
854        // With clean, noiseless data, R^2 should be very close to 1
855        let n_grid = 50;
856        let grid: Vec<f64> = (0..n_grid)
857            .map(|i| i as f64 / (n_grid - 1) as f64)
858            .collect();
859        let n_curves = 30;
860
861        let dt: Vec<f64> = {
862            let mut d = vec![0.0; n_grid];
863            d[0] = (grid[1] - grid[0]) / 2.0;
864            for i in 1..n_grid - 1 {
865                d[i] = (grid[i + 1] - grid[i - 1]) / 2.0;
866            }
867            d[n_grid - 1] = (grid[n_grid - 1] - grid[n_grid - 2]) / 2.0;
868            d
869        };
870
871        // Simple linear beta
872        let beta_true: Vec<f64> = grid.iter().map(|&t| t).collect();
873
874        let mut observations = Vec::with_capacity(n_curves);
875        let mut y = Vec::with_capacity(n_curves);
876        for i in 0..n_curves {
877            let c = i as f64 / n_curves as f64 * 4.0 - 2.0;
878            let curve: Vec<f64> = grid.iter().map(|&t| c * (1.0 + t)).collect();
879            let yi: f64 = curve
880                .iter()
881                .zip(beta_true.iter())
882                .zip(dt.iter())
883                .map(|((&x, &b), &d)| x * b * d)
884                .sum();
885            y.push(yi);
886            observations.push(curve);
887        }
888
889        let data = FunctionalData::new(grid, observations).expect("Data creation should succeed");
890        let config = FunctionalConfig {
891            basis: BasisType::BSpline {
892                n_basis: 12,
893                degree: 3,
894            },
895            smoothing_param: Some(1e-6),
896            n_components: 3,
897        };
898
899        let result =
900            ScalarOnFunctionRegression::fit(&data, &y, &config).expect("Fit should succeed");
901
902        assert!(
903            result.r_squared > 0.99,
904            "R^2 should be > 0.99 on clean data, got {}",
905            result.r_squared
906        );
907    }
908}