use crate::error::{Result, TimeSeriesError};
use crate::functional::basis::{evaluate_basis_matrix, BasisSystem, BSplineBasis};
use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1, Axis};
use scirs2_linalg::{lstsq, solve};
#[derive(Debug, Clone)]
pub struct FunctionalData<B: BasisSystem + Clone> {
pub basis: B,
pub coefficients: Array1<f64>,
pub label: Option<String>,
}
impl<B: BasisSystem + Clone> FunctionalData<B> {
pub fn new(basis: B, coefficients: Array1<f64>) -> Result<Self> {
if coefficients.len() != basis.n_basis() {
return Err(TimeSeriesError::DimensionMismatch {
expected: basis.n_basis(),
actual: coefficients.len(),
});
}
Ok(Self {
basis,
coefficients,
label: None,
})
}
pub fn eval(&self, t: f64) -> Result<f64> {
let phi = self.basis.evaluate(t)?;
Ok(phi.dot(&self.coefficients))
}
pub fn eval_vec(&self, points: &Array1<f64>) -> Result<Array1<f64>> {
let mut vals = Array1::zeros(points.len());
for (i, &t) in points.iter().enumerate() {
vals[i] = self.eval(t)?;
}
Ok(vals)
}
pub fn eval_deriv(&self, t: f64, order: usize) -> Result<f64> {
let dphi = self.basis.evaluate_deriv(t, order)?;
Ok(dphi.dot(&self.coefficients))
}
pub fn eval_deriv_vec(&self, points: &Array1<f64>, order: usize) -> Result<Array1<f64>> {
let mut vals = Array1::zeros(points.len());
for (i, &t) in points.iter().enumerate() {
vals[i] = self.eval_deriv(t, order)?;
}
Ok(vals)
}
pub fn inner_product(&self, other: &FunctionalData<B>) -> Result<f64> {
let g = self.basis.gram_matrix()?;
Ok(self.coefficients.dot(&g.dot(&other.coefficients)))
}
pub fn l2_norm(&self) -> Result<f64> {
let ip = self.inner_product(self)?;
Ok(ip.max(0.0).sqrt())
}
}
#[derive(Debug, Clone)]
pub struct GCV {
pub lambda_grid: Vec<f64>,
pub gcv_values: Vec<f64>,
pub optimal_lambda: f64,
pub optimal_gcv: f64,
}
impl GCV {
pub fn select<F>(
lambda_min: f64,
lambda_max: f64,
n_lambda: usize,
gcv_fn: F,
) -> Result<Self>
where
F: Fn(f64) -> Result<f64>,
{
if n_lambda < 2 {
return Err(TimeSeriesError::InvalidInput(
"n_lambda must be >= 2".to_string(),
));
}
let log_min = lambda_min.ln();
let log_max = lambda_max.ln();
let step = (log_max - log_min) / (n_lambda - 1) as f64;
let mut lambda_grid = Vec::with_capacity(n_lambda);
let mut gcv_values = Vec::with_capacity(n_lambda);
for i in 0..n_lambda {
let lam = (log_min + i as f64 * step).exp();
let gcv_val = gcv_fn(lam)?;
lambda_grid.push(lam);
gcv_values.push(gcv_val);
}
let (opt_idx, &optimal_gcv) = gcv_values
.iter()
.enumerate()
.min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.ok_or_else(|| TimeSeriesError::ComputationError("GCV: empty grid".to_string()))?;
let optimal_lambda = lambda_grid[opt_idx];
Ok(Self {
lambda_grid,
gcv_values,
optimal_lambda,
optimal_gcv,
})
}
}
#[derive(Debug, Clone)]
pub struct PenalizedLeastSquares {
pub lambda: Option<f64>,
pub penalty_order: usize,
pub n_lambda_grid: usize,
pub lambda_min: f64,
pub lambda_max: f64,
}
impl Default for PenalizedLeastSquares {
fn default() -> Self {
Self {
lambda: None,
penalty_order: 2,
n_lambda_grid: 50,
lambda_min: 1e-10,
lambda_max: 1e4,
}
}
}
impl PenalizedLeastSquares {
pub fn fit<B: BasisSystem + Clone>(
&self,
basis: B,
t: &Array1<f64>,
y: &Array1<f64>,
) -> Result<FunctionalData<B>> {
let n = t.len();
if n != y.len() {
return Err(TimeSeriesError::DimensionMismatch {
expected: n,
actual: y.len(),
});
}
if n < 2 {
return Err(TimeSeriesError::InsufficientData {
message: "PenalizedLeastSquares requires at least 2 observations".to_string(),
required: 2,
actual: n,
});
}
let phi = evaluate_basis_matrix(&basis, t)?;
let penalty = basis.penalty_matrix(self.penalty_order)?;
let lambda = match self.lambda {
Some(lam) => lam,
None => {
let gcv = GCV::select(
self.lambda_min,
self.lambda_max,
self.n_lambda_grid,
|lam| {
let gcv_val = Self::gcv_criterion(&phi, y, &penalty, lam, n)?;
Ok(gcv_val)
},
)?;
gcv.optimal_lambda
}
};
let coefficients = Self::solve_penalized(&phi, y, &penalty, lambda)?;
FunctionalData::new(basis, coefficients)
}
fn gcv_criterion(
phi: &Array2<f64>,
y: &Array1<f64>,
penalty: &Array2<f64>,
lambda: f64,
n: usize,
) -> Result<f64> {
let k = phi.ncols();
let phi_t_phi = phi.t().dot(phi);
let mut a = phi_t_phi.clone();
for i in 0..k {
for j in 0..k {
a[[i, j]] += lambda * penalty[[i, j]];
}
}
let phi_t_y = phi.t().dot(y);
let c = solve_linear_system(&a, &phi_t_y)?;
let y_hat = phi.dot(&c);
let rss: f64 = y
.iter()
.zip(y_hat.iter())
.map(|(yi, yhi)| (yi - yhi).powi(2))
.sum();
let a_inv_phi_t = solve_matrix_system(&a, &phi.t().to_owned())?;
let h_diag_sum: f64 = (0..n)
.map(|i| {
let row = phi.row(i);
row.dot(&a_inv_phi_t.t().row(i).to_owned())
})
.sum();
let trace_h = h_diag_sum;
let n_f = n as f64;
let denom = 1.0 - trace_h / n_f;
if denom.abs() < 1e-10 {
return Ok(f64::INFINITY);
}
Ok((rss / n_f) / (denom * denom))
}
fn solve_penalized(
phi: &Array2<f64>,
y: &Array1<f64>,
penalty: &Array2<f64>,
lambda: f64,
) -> Result<Array1<f64>> {
let k = phi.ncols();
let phi_t_phi = phi.t().dot(phi);
let mut a = phi_t_phi;
for i in 0..k {
for j in 0..k {
a[[i, j]] += lambda * penalty[[i, j]];
}
}
let phi_t_y = phi.t().dot(y);
solve_linear_system(&a, &phi_t_y)
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum KernelType {
Epanechnikov,
Gaussian,
Uniform,
Biweight,
Tricubic,
}
impl KernelType {
fn eval(self, u: f64) -> f64 {
match self {
KernelType::Epanechnikov => {
if u.abs() <= 1.0 {
0.75 * (1.0 - u * u)
} else {
0.0
}
}
KernelType::Gaussian => (-0.5 * u * u).exp() / (2.0 * std::f64::consts::PI).sqrt(),
KernelType::Uniform => {
if u.abs() <= 1.0 {
0.5
} else {
0.0
}
}
KernelType::Biweight => {
if u.abs() <= 1.0 {
let v = 1.0 - u * u;
(15.0 / 16.0) * v * v
} else {
0.0
}
}
KernelType::Tricubic => {
let abs_u = u.abs();
if abs_u <= 1.0 {
let v = 1.0 - abs_u.powi(3);
(70.0 / 81.0) * v * v * v
} else {
0.0
}
}
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum LocalPolynomialOrder {
NadarayaWatson,
LocalLinear,
LocalQuadratic,
}
#[derive(Debug, Clone)]
pub struct KernelSmoother {
pub bandwidth: Option<f64>,
pub kernel: KernelType,
pub poly_order: LocalPolynomialOrder,
pub n_bw_candidates: usize,
}
impl Default for KernelSmoother {
fn default() -> Self {
Self {
bandwidth: None,
kernel: KernelType::Epanechnikov,
poly_order: LocalPolynomialOrder::LocalLinear,
n_bw_candidates: 40,
}
}
}
impl KernelSmoother {
pub fn fit(
&self,
t: &Array1<f64>,
y: &Array1<f64>,
) -> Result<Array1<f64>> {
let n = t.len();
if n != y.len() {
return Err(TimeSeriesError::DimensionMismatch {
expected: n,
actual: y.len(),
});
}
let h = match self.bandwidth {
Some(bw) => bw,
None => self.select_bandwidth_cv(t, y)?,
};
self.predict_at(t, y, t, h)
}
pub fn predict_at(
&self,
t_obs: &Array1<f64>,
y_obs: &Array1<f64>,
t_new: &Array1<f64>,
bandwidth: f64,
) -> Result<Array1<f64>> {
let m = t_new.len();
let mut result = Array1::zeros(m);
for (i, &t0) in t_new.iter().enumerate() {
result[i] = self.local_fit(t_obs, y_obs, t0, bandwidth)?;
}
Ok(result)
}
fn local_fit(
&self,
t: &Array1<f64>,
y: &Array1<f64>,
t0: f64,
h: f64,
) -> Result<f64> {
let n = t.len();
let weights: Vec<f64> = t.iter().map(|&ti| self.kernel.eval((ti - t0) / h)).collect();
let sum_w: f64 = weights.iter().sum();
if sum_w < 1e-15 {
let nearest = t
.iter()
.enumerate()
.min_by(|(_, &a), (_, &b)| {
(a - t0)
.abs()
.partial_cmp(&(b - t0).abs())
.unwrap_or(std::cmp::Ordering::Equal)
})
.map(|(i, _)| i)
.unwrap_or(0);
return Ok(y[nearest]);
}
match self.poly_order {
LocalPolynomialOrder::NadarayaWatson => {
let num: f64 = weights.iter().zip(y.iter()).map(|(&w, &yi)| w * yi).sum();
Ok(num / sum_w)
}
LocalPolynomialOrder::LocalLinear => {
let mut xtwx = Array2::zeros((2, 2));
let mut xtwy = Array1::zeros(2);
for i in 0..n {
let wi = weights[i];
let xi = t[i] - t0;
xtwx[[0, 0]] += wi;
xtwx[[0, 1]] += wi * xi;
xtwx[[1, 0]] += wi * xi;
xtwx[[1, 1]] += wi * xi * xi;
xtwy[0] += wi * y[i];
xtwy[1] += wi * xi * y[i];
}
let beta = solve_linear_system(&xtwx, &xtwy)?;
Ok(beta[0])
}
LocalPolynomialOrder::LocalQuadratic => {
let mut xtwx = Array2::zeros((3, 3));
let mut xtwy = Array1::zeros(3);
for i in 0..n {
let wi = weights[i];
let xi = t[i] - t0;
let xi2 = xi * xi;
let row = [1.0, xi, xi2];
for p in 0..3 {
for q in 0..3 {
xtwx[[p, q]] += wi * row[p] * row[q];
}
xtwy[p] += wi * row[p] * y[i];
}
}
let beta = solve_linear_system(&xtwx, &xtwy)?;
Ok(beta[0])
}
}
}
fn select_bandwidth_cv(&self, t: &Array1<f64>, y: &Array1<f64>) -> Result<f64> {
let n = t.len();
let t_min = t.iter().cloned().fold(f64::INFINITY, f64::min);
let t_max = t.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let range = t_max - t_min;
if range < 1e-15 {
return Err(TimeSeriesError::InvalidInput(
"All time points are identical".to_string(),
));
}
let h_min = 0.05 * range;
let h_max = range;
let log_min = h_min.ln();
let log_max = h_max.ln();
let step = (log_max - log_min) / (self.n_bw_candidates - 1).max(1) as f64;
let mut best_h = h_min;
let mut best_cv = f64::INFINITY;
for k in 0..self.n_bw_candidates {
let h = (log_min + k as f64 * step).exp();
let mut cv_error = 0.0;
for i in 0..n {
let t_loo: Vec<f64> = t
.iter()
.enumerate()
.filter(|&(j, _)| j != i)
.map(|(_, &v)| v)
.collect();
let y_loo: Vec<f64> = y
.iter()
.enumerate()
.filter(|&(j, _)| j != i)
.map(|(_, &v)| v)
.collect();
let t_arr = Array1::from(t_loo);
let y_arr = Array1::from(y_loo);
let y_pred = self.local_fit(&t_arr, &y_arr, t[i], h)?;
cv_error += (y[i] - y_pred).powi(2);
}
cv_error /= n as f64;
if cv_error < best_cv {
best_cv = cv_error;
best_h = h;
}
}
Ok(best_h)
}
}
#[derive(Debug, Clone)]
pub struct SplineSmoother {
pub lambda: Option<f64>,
pub n_interior_knots: usize,
pub n_lambda_grid: usize,
pub lambda_min: f64,
pub lambda_max: f64,
}
impl Default for SplineSmoother {
fn default() -> Self {
Self {
lambda: None,
n_interior_knots: 20,
n_lambda_grid: 60,
lambda_min: 1e-12,
lambda_max: 1e6,
}
}
}
impl SplineSmoother {
pub fn fit(
&self,
t: &Array1<f64>,
y: &Array1<f64>,
) -> Result<FunctionalData<BSplineBasis>> {
let n = t.len();
if n != y.len() {
return Err(TimeSeriesError::DimensionMismatch {
expected: n,
actual: y.len(),
});
}
if n < 4 {
return Err(TimeSeriesError::InsufficientData {
message: "SplineSmoother requires at least 4 observations".to_string(),
required: 4,
actual: n,
});
}
let basis = BSplineBasis::uniform(self.n_interior_knots, 4)?;
let pls = PenalizedLeastSquares {
lambda: self.lambda,
penalty_order: 2,
n_lambda_grid: self.n_lambda_grid,
lambda_min: self.lambda_min,
lambda_max: self.lambda_max,
};
pls.fit(basis, t, y)
}
pub fn fit_with_lambda(
&self,
t: &Array1<f64>,
y: &Array1<f64>,
) -> Result<(FunctionalData<BSplineBasis>, f64)> {
let n = t.len();
if n < 4 {
return Err(TimeSeriesError::InsufficientData {
message: "SplineSmoother requires at least 4 observations".to_string(),
required: 4,
actual: n,
});
}
let basis = BSplineBasis::uniform(self.n_interior_knots, 4)?;
let phi = evaluate_basis_matrix(&basis, t)?;
let penalty = basis.penalty_matrix(2)?;
let lambda = match self.lambda {
Some(lam) => lam,
None => {
let gcv = GCV::select(self.lambda_min, self.lambda_max, self.n_lambda_grid, |lam| {
PenalizedLeastSquares::gcv_criterion(&phi, y, &penalty, lam, n)
})?;
gcv.optimal_lambda
}
};
let coefficients = PenalizedLeastSquares::solve_penalized(&phi, y, &penalty, lambda)?;
let fd = FunctionalData::new(basis, coefficients)?;
Ok((fd, lambda))
}
pub fn cross_validate(
&self,
t: &Array1<f64>,
y: &Array1<f64>,
) -> Result<(Vec<f64>, Vec<f64>)> {
let n = t.len();
if n < 4 {
return Err(TimeSeriesError::InsufficientData {
message: "SplineSmoother requires at least 4 observations".to_string(),
required: 4,
actual: n,
});
}
let basis = BSplineBasis::uniform(self.n_interior_knots, 4)?;
let phi = evaluate_basis_matrix(&basis, t)?;
let penalty = basis.penalty_matrix(2)?;
let log_min = self.lambda_min.ln();
let log_max = self.lambda_max.ln();
let step = (log_max - log_min) / (self.n_lambda_grid - 1).max(1) as f64;
let mut lambdas = Vec::with_capacity(self.n_lambda_grid);
let mut gcv_vals = Vec::with_capacity(self.n_lambda_grid);
for k in 0..self.n_lambda_grid {
let lam = (log_min + k as f64 * step).exp();
let gcv = PenalizedLeastSquares::gcv_criterion(&phi, y, &penalty, lam, n)?;
lambdas.push(lam);
gcv_vals.push(gcv);
}
Ok((lambdas, gcv_vals))
}
}
pub(crate) fn solve_linear_system(a: &Array2<f64>, b: &Array1<f64>) -> Result<Array1<f64>> {
let result = lstsq(&a.view(), &b.view(), None).map_err(|e| {
TimeSeriesError::ComputationError(format!("lstsq failed: {}", e))
})?;
let sol = result.x.clone();
Ok(sol)
}
pub(crate) fn solve_matrix_system(a: &Array2<f64>, b: &Array2<f64>) -> Result<Array2<f64>> {
let k = b.ncols();
let n = b.nrows();
let mut x = Array2::zeros((n, k));
for j in 0..k {
let bj = b.column(j).to_owned();
let xj = solve_linear_system(a, &bj)?;
for i in 0..n {
x[[i, j]] = xj[i];
}
}
Ok(x)
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
use scirs2_core::ndarray::Array1;
fn make_noisy_sine(n: usize, noise: f64) -> (Array1<f64>, Array1<f64>) {
let t: Vec<f64> = (0..n).map(|i| i as f64 / (n - 1) as f64 * 2.0 * std::f64::consts::PI).collect();
let y: Vec<f64> = t.iter().map(|&ti| ti.sin() + noise * ((ti * 7.3).cos())).collect();
(Array1::from(t), Array1::from(y))
}
#[test]
fn test_spline_smoother_basic() {
let (t, y) = make_noisy_sine(50, 0.1);
let smoother = SplineSmoother::default();
let fd = smoother.fit(&t, &y).expect("smoother fit");
let val = fd.eval(std::f64::consts::PI).expect("eval");
assert!(val.abs() < 0.5);
}
#[test]
fn test_kernel_smoother_nadaraya_watson() {
let (t, y) = make_noisy_sine(60, 0.05);
let smoother = KernelSmoother {
bandwidth: Some(0.3),
kernel: KernelType::Gaussian,
poly_order: LocalPolynomialOrder::NadarayaWatson,
n_bw_candidates: 10,
};
let fitted = smoother.fit(&t, &y).expect("kernel smoother fit");
assert_eq!(fitted.len(), t.len());
}
#[test]
fn test_penalized_least_squares_fit() {
let (t, y) = make_noisy_sine(40, 0.1);
let basis = crate::functional::basis::BSplineBasis::uniform(8, 4).expect("basis");
let pls = PenalizedLeastSquares {
lambda: Some(1e-3),
..Default::default()
};
let fd = pls.fit(basis, &t, &y).expect("PLS fit");
assert_eq!(fd.coefficients.len(), fd.basis.n_basis());
}
#[test]
fn test_functional_data_inner_product() {
let basis = crate::functional::basis::BSplineBasis::uniform(5, 4).expect("basis");
let n_basis = basis.n_basis();
let c1 = Array1::from_elem(n_basis, 1.0);
let c2 = Array1::from_elem(n_basis, 2.0);
let fd1 = FunctionalData::new(basis.clone(), c1).expect("fd1");
let fd2 = FunctionalData::new(basis, c2).expect("fd2");
let ip = fd1.inner_product(&fd2).expect("inner product");
assert!(ip > 0.0);
}
}