use crate::error::FdarError;
use crate::slice_maybe_parallel;
#[cfg(feature = "parallel")]
use rayon::iter::ParallelIterator;
fn gaussian_kernel(u: f64) -> f64 {
(-0.5 * u * u).exp() / (2.0 * std::f64::consts::PI).sqrt()
}
fn epanechnikov_kernel(u: f64) -> f64 {
if u.abs() <= 1.0 {
0.75 * (1.0 - u * u)
} else {
0.0
}
}
fn tricube_kernel(u: f64) -> f64 {
let abs_u = u.abs();
if abs_u < 1.0 {
(1.0 - abs_u.powi(3)).powi(3)
} else {
0.0
}
}
fn get_kernel(kernel_type: &str) -> fn(f64) -> f64 {
match kernel_type.to_lowercase().as_str() {
"epanechnikov" | "epan" => epanechnikov_kernel,
"tricube" | "tri-cube" => tricube_kernel,
_ => gaussian_kernel,
}
}
pub fn nadaraya_watson(
x: &[f64],
y: &[f64],
x_new: &[f64],
bandwidth: f64,
kernel: &str,
) -> Result<Vec<f64>, FdarError> {
let n = x.len();
if n == 0 {
return Err(FdarError::InvalidDimension {
parameter: "x",
expected: "non-empty slice".to_string(),
actual: "empty".to_string(),
});
}
if y.len() != n {
return Err(FdarError::InvalidDimension {
parameter: "y",
expected: format!("length {n} (matching x)"),
actual: format!("length {}", y.len()),
});
}
if x_new.is_empty() {
return Err(FdarError::InvalidDimension {
parameter: "x_new",
expected: "non-empty slice".to_string(),
actual: "empty".to_string(),
});
}
if bandwidth <= 0.0 {
return Err(FdarError::InvalidParameter {
parameter: "bandwidth",
message: format!("must be positive, got {bandwidth}"),
});
}
let kernel_fn = get_kernel(kernel);
Ok(slice_maybe_parallel!(x_new)
.map(|&x0| {
let mut num = 0.0;
let mut denom = 0.0;
for i in 0..n {
let u = (x[i] - x0) / bandwidth;
let w = kernel_fn(u);
num += w * y[i];
denom += w;
}
if denom > 1e-10 {
num / denom
} else {
0.0
}
})
.collect())
}
pub fn local_linear(
x: &[f64],
y: &[f64],
x_new: &[f64],
bandwidth: f64,
kernel: &str,
) -> Result<Vec<f64>, FdarError> {
let n = x.len();
if n == 0 {
return Err(FdarError::InvalidDimension {
parameter: "x",
expected: "non-empty slice".to_string(),
actual: "empty".to_string(),
});
}
if y.len() != n {
return Err(FdarError::InvalidDimension {
parameter: "y",
expected: format!("length {n} (matching x)"),
actual: format!("length {}", y.len()),
});
}
if x_new.is_empty() {
return Err(FdarError::InvalidDimension {
parameter: "x_new",
expected: "non-empty slice".to_string(),
actual: "empty".to_string(),
});
}
if bandwidth <= 0.0 {
return Err(FdarError::InvalidParameter {
parameter: "bandwidth",
message: format!("must be positive, got {bandwidth}"),
});
}
let kernel_fn = get_kernel(kernel);
Ok(slice_maybe_parallel!(x_new)
.map(|&x0| {
let mut s0 = 0.0;
let mut s1 = 0.0;
let mut s2 = 0.0;
let mut t0 = 0.0;
let mut t1 = 0.0;
for i in 0..n {
let u = (x[i] - x0) / bandwidth;
let w = kernel_fn(u);
let d = x[i] - x0;
s0 += w;
s1 += w * d;
s2 += w * d * d;
t0 += w * y[i];
t1 += w * y[i] * d;
}
let det = s0 * s2 - s1 * s1;
if det.abs() > 1e-10 {
(s2 * t0 - s1 * t1) / det
} else if s0 > 1e-10 {
t0 / s0
} else {
0.0
}
})
.collect())
}
fn accumulate_weighted_normal_equations(
x: &[f64],
y: &[f64],
x0: f64,
bandwidth: f64,
p: usize,
kernel_fn: impl Fn(f64) -> f64,
) -> (Vec<f64>, Vec<f64>) {
let n = x.len();
let mut xtx = vec![0.0; p * p];
let mut xty = vec![0.0; p];
for i in 0..n {
let u = (x[i] - x0) / bandwidth;
let w = kernel_fn(u);
let d = x[i] - x0;
for j in 0..p {
let w_dj = w * d.powi(j as i32);
for k in 0..p {
xtx[j * p + k] += w_dj * d.powi(k as i32);
}
xty[j] += w_dj * y[i];
}
}
(xtx, xty)
}
fn find_pivot(a: &[f64], p: usize, col: usize) -> usize {
let mut max_idx = col;
for j in (col + 1)..p {
if a[j * p + col].abs() > a[max_idx * p + col].abs() {
max_idx = j;
}
}
max_idx
}
fn swap_rows(a: &mut [f64], b: &mut [f64], p: usize, row_a: usize, row_b: usize) {
for k in 0..p {
a.swap(row_a * p + k, row_b * p + k);
}
b.swap(row_a, row_b);
}
fn eliminate_below(a: &mut [f64], b: &mut [f64], p: usize, pivot_row: usize) {
let pivot = a[pivot_row * p + pivot_row];
for j in (pivot_row + 1)..p {
let factor = a[j * p + pivot_row] / pivot;
for k in pivot_row..p {
a[j * p + k] -= factor * a[pivot_row * p + k];
}
b[j] -= factor * b[pivot_row];
}
}
fn forward_eliminate(a: &mut [f64], b: &mut [f64], p: usize) {
for i in 0..p {
let max_idx = find_pivot(a, p, i);
if max_idx != i {
swap_rows(a, b, p, i, max_idx);
}
if a[i * p + i].abs() < 1e-10 {
continue;
}
eliminate_below(a, b, p, i);
}
}
fn back_substitute(a: &[f64], b: &[f64], p: usize) -> Vec<f64> {
let mut coefs = vec![0.0; p];
for i in (0..p).rev() {
let mut sum = b[i];
for j in (i + 1)..p {
sum -= a[i * p + j] * coefs[j];
}
if a[i * p + i].abs() > 1e-10 {
coefs[i] = sum / a[i * p + i];
}
}
coefs
}
fn solve_gaussian(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
forward_eliminate(a, b, p);
back_substitute(a, b, p)
}
pub fn solve_gaussian_pub(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
solve_gaussian(a, b, p)
}
pub fn local_polynomial(
x: &[f64],
y: &[f64],
x_new: &[f64],
bandwidth: f64,
degree: usize,
kernel: &str,
) -> Result<Vec<f64>, FdarError> {
let n = x.len();
if n == 0 {
return Err(FdarError::InvalidDimension {
parameter: "x",
expected: "non-empty slice".to_string(),
actual: "empty".to_string(),
});
}
if y.len() != n {
return Err(FdarError::InvalidDimension {
parameter: "y",
expected: format!("length {n} (matching x)"),
actual: format!("length {}", y.len()),
});
}
if x_new.is_empty() {
return Err(FdarError::InvalidDimension {
parameter: "x_new",
expected: "non-empty slice".to_string(),
actual: "empty".to_string(),
});
}
if bandwidth <= 0.0 {
return Err(FdarError::InvalidParameter {
parameter: "bandwidth",
message: format!("must be positive, got {bandwidth}"),
});
}
if degree == 0 {
return nadaraya_watson(x, y, x_new, bandwidth, kernel);
}
if degree == 1 {
return local_linear(x, y, x_new, bandwidth, kernel);
}
let kernel_fn = get_kernel(kernel);
let p = degree + 1;
Ok(slice_maybe_parallel!(x_new)
.map(|&x0| {
let (mut xtx, mut xty) =
accumulate_weighted_normal_equations(x, y, x0, bandwidth, p, kernel_fn);
let coefs = solve_gaussian(&mut xtx, &mut xty, p);
coefs[0]
})
.collect())
}
pub fn knn_smoother(x: &[f64], y: &[f64], x_new: &[f64], k: usize) -> Result<Vec<f64>, FdarError> {
let n = x.len();
if n == 0 {
return Err(FdarError::InvalidDimension {
parameter: "x",
expected: "non-empty slice".to_string(),
actual: "empty".to_string(),
});
}
if y.len() != n {
return Err(FdarError::InvalidDimension {
parameter: "y",
expected: format!("length {n} (matching x)"),
actual: format!("length {}", y.len()),
});
}
if x_new.is_empty() {
return Err(FdarError::InvalidDimension {
parameter: "x_new",
expected: "non-empty slice".to_string(),
actual: "empty".to_string(),
});
}
if k == 0 {
return Err(FdarError::InvalidParameter {
parameter: "k",
message: "must be at least 1".to_string(),
});
}
let k = k.min(n);
Ok(slice_maybe_parallel!(x_new)
.map(|&x0| {
let mut distances: Vec<(usize, f64)> = x
.iter()
.enumerate()
.map(|(i, &xi)| (i, (xi - x0).abs()))
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
let sum: f64 = distances.iter().take(k).map(|(i, _)| y[*i]).sum();
sum / k as f64
})
.collect())
}
pub fn smoothing_matrix_nw(x: &[f64], bandwidth: f64, kernel: &str) -> Result<Vec<f64>, FdarError> {
let n = x.len();
if n == 0 {
return Err(FdarError::InvalidDimension {
parameter: "x",
expected: "non-empty slice".to_string(),
actual: "empty".to_string(),
});
}
if bandwidth <= 0.0 {
return Err(FdarError::InvalidParameter {
parameter: "bandwidth",
message: format!("must be positive, got {bandwidth}"),
});
}
let kernel_fn = get_kernel(kernel);
let mut s = vec![0.0; n * n];
for i in 0..n {
let mut row_sum = 0.0;
for j in 0..n {
let u = (x[j] - x[i]) / bandwidth;
s[i + j * n] = kernel_fn(u);
row_sum += s[i + j * n];
}
if row_sum > 1e-10 {
for j in 0..n {
s[i + j * n] /= row_sum;
}
}
}
Ok(s)
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum CvCriterion {
Cv,
Gcv,
}
#[derive(Debug, Clone)]
pub struct OptimBandwidthResult {
pub h_opt: f64,
pub criterion: CvCriterion,
pub value: f64,
}
pub fn cv_smoother(x: &[f64], y: &[f64], bandwidth: f64, kernel: &str) -> f64 {
let n = x.len();
if n < 2 || y.len() != n || bandwidth <= 0.0 {
return f64::INFINITY;
}
let mut s = match smoothing_matrix_nw(x, bandwidth, kernel) {
Ok(s) => s,
Err(_) => return f64::INFINITY,
};
for i in 0..n {
s[i + i * n] = 0.0;
}
for i in 0..n {
let row_sum: f64 = (0..n).map(|j| s[i + j * n]).sum();
if row_sum > 1e-10 {
for j in 0..n {
s[i + j * n] /= row_sum;
}
}
}
let mut mse = 0.0;
for i in 0..n {
let y_hat: f64 = (0..n).map(|j| s[i + j * n] * y[j]).sum();
let resid = y[i] - y_hat;
mse += resid * resid;
}
mse / n as f64
}
pub fn gcv_smoother(x: &[f64], y: &[f64], bandwidth: f64, kernel: &str) -> f64 {
let n = x.len();
if n < 2 || y.len() != n || bandwidth <= 0.0 {
return f64::INFINITY;
}
let s = match smoothing_matrix_nw(x, bandwidth, kernel) {
Ok(s) => s,
Err(_) => return f64::INFINITY,
};
let mut rss = 0.0;
for i in 0..n {
let y_hat: f64 = (0..n).map(|j| s[i + j * n] * y[j]).sum();
let resid = y[i] - y_hat;
rss += resid * resid;
}
let trace_s: f64 = (0..n).map(|i| s[i + i * n]).sum();
let denom = 1.0 - trace_s / n as f64;
if denom.abs() < 1e-10 {
f64::INFINITY
} else {
(rss / n as f64) / (denom * denom)
}
}
pub fn optim_bandwidth(
x: &[f64],
y: &[f64],
h_range: Option<(f64, f64)>,
criterion: CvCriterion,
kernel: &str,
n_grid: usize,
) -> OptimBandwidthResult {
let n = x.len();
let n_grid = n_grid.max(2);
let (h_min, h_max) = match h_range {
Some((lo, hi)) if lo > 0.0 && hi > lo => (lo, hi),
_ => {
let x_min = x.iter().copied().fold(f64::INFINITY, f64::min);
let x_max = x.iter().copied().fold(f64::NEG_INFINITY, f64::max);
let h_default = (x_max - x_min) / (n as f64).powf(0.2);
let h_default = h_default.max(1e-10);
(h_default / 5.0, h_default * 5.0)
}
};
let score_fn = match criterion {
CvCriterion::Cv => cv_smoother,
CvCriterion::Gcv => gcv_smoother,
};
let mut best_h = h_min;
let mut best_score = f64::INFINITY;
for i in 0..n_grid {
let h = h_min + (h_max - h_min) * i as f64 / (n_grid - 1) as f64;
let score = score_fn(x, y, h, kernel);
if score < best_score {
best_score = score;
best_h = h;
}
}
OptimBandwidthResult {
h_opt: best_h,
criterion,
value: best_score,
}
}
#[derive(Debug, Clone)]
pub struct KnnCvResult {
pub optimal_k: usize,
pub cv_errors: Vec<f64>,
}
pub fn knn_gcv(x: &[f64], y: &[f64], max_k: usize) -> KnnCvResult {
let n = x.len();
let max_k = max_k.min(n.saturating_sub(1)).max(1);
let mut sorted_neighbors: Vec<Vec<(usize, f64)>> = Vec::with_capacity(n);
for i in 0..n {
let mut dists: Vec<(usize, f64)> = (0..n)
.filter(|&j| j != i)
.map(|j| (j, (x[j] - x[i]).abs()))
.collect();
dists.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
sorted_neighbors.push(dists);
}
let mut cv_errors = Vec::with_capacity(max_k);
for k in 1..=max_k {
let mut mse = 0.0;
for i in 0..n {
let neighbors = &sorted_neighbors[i];
let d_k = if k <= neighbors.len() {
neighbors[k - 1].1
} else {
neighbors.last().map_or(1.0, |x| x.1)
};
let d_k1 = if k < neighbors.len() {
neighbors[k].1
} else {
d_k * 2.0
};
let h = (d_k + d_k1) / 2.0;
let h = h.max(1e-10);
let mut num = 0.0;
let mut den = 0.0;
for &(j, dist) in neighbors.iter().take(k) {
let u = dist / h;
let w = epanechnikov_kernel(u);
num += w * y[j];
den += w;
}
let y_hat = if den > 1e-10 { num / den } else { y[i] };
mse += (y[i] - y_hat).powi(2);
}
cv_errors.push(mse / n as f64);
}
let optimal_k = cv_errors
.iter()
.enumerate()
.min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.map_or(1, |(i, _)| i + 1);
KnnCvResult {
optimal_k,
cv_errors,
}
}
pub fn knn_lcv(x: &[f64], y: &[f64], max_k: usize) -> Vec<usize> {
let n = x.len();
let max_k = max_k.min(n.saturating_sub(1)).max(1);
let mut per_obs_k = vec![1usize; n];
for i in 0..n {
let mut neighbors: Vec<(usize, f64)> = (0..n)
.filter(|&j| j != i)
.map(|j| (j, (x[j] - x[i]).abs()))
.collect();
neighbors.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
let mut best_k = 1;
let mut best_err = f64::INFINITY;
for k in 1..=max_k {
let sum: f64 = neighbors.iter().take(k).map(|&(j, _)| y[j]).sum();
let y_hat = sum / k as f64;
let err = (y[i] - y_hat).abs();
if err < best_err {
best_err = err;
best_k = k;
}
}
per_obs_k[i] = best_k;
}
per_obs_k
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test_helpers::uniform_grid;
#[test]
fn test_nw_constant_data() {
let x = uniform_grid(20);
let y: Vec<f64> = vec![5.0; 20];
let y_smooth = nadaraya_watson(&x, &y, &x, 0.1, "gaussian").unwrap();
for &yi in &y_smooth {
assert!(
(yi - 5.0).abs() < 0.1,
"Constant data should remain constant"
);
}
}
#[test]
fn test_nw_linear_data() {
let x = uniform_grid(50);
let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "gaussian").unwrap();
for i in 10..40 {
let expected = 2.0 * x[i] + 1.0;
assert!(
(y_smooth[i] - expected).abs() < 0.2,
"Linear trend should be approximately preserved"
);
}
}
#[test]
fn test_nw_gaussian_vs_epanechnikov() {
let x = uniform_grid(30);
let y: Vec<f64> = x
.iter()
.map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
.collect();
let y_gauss = nadaraya_watson(&x, &y, &x, 0.1, "gaussian").unwrap();
let y_epan = nadaraya_watson(&x, &y, &x, 0.1, "epanechnikov").unwrap();
assert_eq!(y_gauss.len(), 30);
assert_eq!(y_epan.len(), 30);
let diff: f64 = y_gauss
.iter()
.zip(&y_epan)
.map(|(a, b)| (a - b).abs())
.sum();
assert!(
diff > 0.0,
"Different kernels should give different results"
);
}
#[test]
fn test_nw_invalid_input() {
assert!(nadaraya_watson(&[], &[], &[0.5], 0.1, "gaussian").is_err());
assert!(nadaraya_watson(&[0.0, 1.0], &[1.0], &[0.5], 0.1, "gaussian").is_err());
assert!(nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.0, "gaussian").is_err());
assert!(nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[], 0.1, "gaussian").is_err());
}
#[test]
fn test_ll_constant_data() {
let x = uniform_grid(20);
let y: Vec<f64> = vec![3.0; 20];
let y_smooth = local_linear(&x, &y, &x, 0.15, "gaussian").unwrap();
for &yi in &y_smooth {
assert!((yi - 3.0).abs() < 0.1, "Constant should remain constant");
}
}
#[test]
fn test_ll_linear_data_exact() {
let x = uniform_grid(30);
let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
let y_smooth = local_linear(&x, &y, &x, 0.2, "gaussian").unwrap();
for i in 5..25 {
let expected = 3.0 * x[i] + 2.0;
assert!(
(y_smooth[i] - expected).abs() < 0.1,
"Local linear should fit linear data well"
);
}
}
#[test]
fn test_ll_invalid_input() {
assert!(local_linear(&[], &[], &[0.5], 0.1, "gaussian").is_err());
assert!(local_linear(&[0.0, 1.0], &[1.0, 2.0], &[0.5], -0.1, "gaussian").is_err());
}
#[test]
fn test_lp_degree1_equals_local_linear() {
let x = uniform_grid(25);
let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
let y_ll = local_linear(&x, &y, &x, 0.15, "gaussian").unwrap();
let y_lp = local_polynomial(&x, &y, &x, 0.15, 1, "gaussian").unwrap();
for i in 0..25 {
assert!(
(y_ll[i] - y_lp[i]).abs() < 1e-10,
"Degree 1 should equal local linear"
);
}
}
#[test]
fn test_lp_quadratic_data() {
let x = uniform_grid(40);
let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "gaussian").unwrap();
for i in 8..32 {
let expected = x[i] * x[i];
assert!(
(y_smooth[i] - expected).abs() < 0.1,
"Local quadratic should fit quadratic data"
);
}
}
#[test]
fn test_lp_invalid_input() {
let result =
local_polynomial(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, 0, "gaussian").unwrap();
let nw = nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, "gaussian").unwrap();
assert_eq!(result, nw);
assert!(local_polynomial(&[], &[], &[0.5], 0.1, 2, "gaussian").is_err());
}
#[test]
fn test_knn_k1_nearest() {
let x = vec![0.0, 0.5, 1.0];
let y = vec![1.0, 2.0, 3.0];
let result = knn_smoother(&x, &y, &[0.1, 0.6, 0.9], 1).unwrap();
assert!((result[0] - 1.0).abs() < 1e-10, "0.1 nearest to 0.0 -> 1.0");
assert!((result[1] - 2.0).abs() < 1e-10, "0.6 nearest to 0.5 -> 2.0");
assert!((result[2] - 3.0).abs() < 1e-10, "0.9 nearest to 1.0 -> 3.0");
}
#[test]
fn test_knn_k_equals_n_is_mean() {
let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let expected_mean = 3.0;
let result = knn_smoother(&x, &y, &[0.5], 5).unwrap();
assert!(
(result[0] - expected_mean).abs() < 1e-10,
"k=n should return mean"
);
}
#[test]
fn test_knn_invalid_input() {
assert!(knn_smoother(&[], &[], &[0.5], 3).is_err());
assert!(knn_smoother(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0).is_err());
}
#[test]
fn test_smoothing_matrix_row_stochastic() {
let x = uniform_grid(10);
let s = smoothing_matrix_nw(&x, 0.2, "gaussian").unwrap();
assert_eq!(s.len(), 100);
for i in 0..10 {
let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
assert!(
(row_sum - 1.0).abs() < 1e-10,
"Row {} should sum to 1, got {}",
i,
row_sum
);
}
}
#[test]
fn test_smoothing_matrix_invalid_input() {
assert!(smoothing_matrix_nw(&[], 0.1, "gaussian").is_err());
assert!(smoothing_matrix_nw(&[0.0, 1.0], 0.0, "gaussian").is_err());
}
#[test]
fn test_nan_nw_no_panic() {
let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
let mut y = vec![0.0, 1.0, 2.0, 1.0, 0.0];
y[2] = f64::NAN;
let result = nadaraya_watson(&x, &y, &x, 0.3, "gaussian").unwrap();
assert_eq!(result.len(), x.len());
}
#[test]
fn test_n1_smoother() {
let x = vec![0.5];
let y = vec![3.0];
let x_new = vec![0.5];
let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian").unwrap();
assert_eq!(result.len(), 1);
assert!(
(result[0] - 3.0).abs() < 1e-6,
"Single point smoother should return the value"
);
}
#[test]
fn test_duplicate_x_smoother() {
let x = vec![0.0, 0.0, 0.5, 1.0, 1.0];
let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let x_new = vec![0.0, 0.5, 1.0];
let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian").unwrap();
assert_eq!(result.len(), 3);
for v in &result {
assert!(v.is_finite());
}
}
#[test]
fn test_cv_smoother_linear_data() {
let x = uniform_grid(30);
let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
let cv = cv_smoother(&x, &y, 0.2, "gaussian");
assert!(cv.is_finite());
assert!(cv >= 0.0);
assert!(cv < 1.0, "CV error for smooth linear data should be small");
}
#[test]
fn test_cv_smoother_invalid() {
assert_eq!(cv_smoother(&[], &[], 0.1, "gaussian"), f64::INFINITY);
assert_eq!(
cv_smoother(&[0.0, 1.0], &[1.0, 2.0], -0.1, "gaussian"),
f64::INFINITY
);
}
#[test]
fn test_gcv_smoother_linear_data() {
let x = uniform_grid(30);
let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
let gcv = gcv_smoother(&x, &y, 0.2, "gaussian");
assert!(gcv.is_finite());
assert!(gcv >= 0.0);
}
#[test]
fn test_gcv_smoother_invalid() {
assert_eq!(gcv_smoother(&[], &[], 0.1, "gaussian"), f64::INFINITY);
}
#[test]
fn test_optim_bandwidth_returns_valid() {
let x = uniform_grid(30);
let y: Vec<f64> = x
.iter()
.map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
.collect();
let result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "gaussian", 20);
assert!(result.h_opt > 0.0);
assert!(result.value.is_finite());
assert_eq!(result.criterion, CvCriterion::Gcv);
}
#[test]
fn test_optim_bandwidth_cv_vs_gcv() {
let x = uniform_grid(25);
let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
let cv_result = optim_bandwidth(&x, &y, None, CvCriterion::Cv, "gaussian", 20);
let gcv_result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "gaussian", 20);
assert!(cv_result.h_opt > 0.0);
assert!(gcv_result.h_opt > 0.0);
}
#[test]
fn test_optim_bandwidth_custom_range() {
let x = uniform_grid(20);
let y: Vec<f64> = x.to_vec();
let result = optim_bandwidth(&x, &y, Some((0.05, 0.5)), CvCriterion::Cv, "gaussian", 10);
assert!(result.h_opt >= 0.05);
assert!(result.h_opt <= 0.5);
}
#[test]
fn test_knn_gcv_returns_valid() {
let x = uniform_grid(20);
let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
let result = knn_gcv(&x, &y, 10);
assert!(result.optimal_k >= 1);
assert!(result.optimal_k <= 10);
assert_eq!(result.cv_errors.len(), 10);
for &e in &result.cv_errors {
assert!(e.is_finite());
assert!(e >= 0.0);
}
}
#[test]
fn test_knn_gcv_constant_data() {
let x = uniform_grid(15);
let y = vec![5.0; 15];
let result = knn_gcv(&x, &y, 5);
for &e in &result.cv_errors {
assert!(e < 0.01, "Constant data: CV error should be near zero");
}
}
#[test]
fn test_knn_lcv_returns_valid() {
let x = uniform_grid(15);
let y: Vec<f64> = x.to_vec();
let result = knn_lcv(&x, &y, 5);
assert_eq!(result.len(), 15);
for &k in &result {
assert!(k >= 1);
assert!(k <= 5);
}
}
#[test]
fn test_knn_lcv_constant_data() {
let x = uniform_grid(10);
let y = vec![3.0; 10];
let result = knn_lcv(&x, &y, 5);
assert_eq!(result.len(), 10);
for &k in &result {
assert!(k >= 1);
}
}
#[test]
fn test_tricube_kernel_values() {
let k0 = tricube_kernel(0.0);
assert!((k0 - 1.0).abs() < 1e-10, "tricube(0) should be 1.0");
assert_eq!(tricube_kernel(1.0), 0.0, "tricube(1) should be 0");
assert_eq!(tricube_kernel(-1.0), 0.0, "tricube(-1) should be 0");
assert_eq!(tricube_kernel(2.0), 0.0, "tricube(2) should be 0");
let k05 = tricube_kernel(0.5);
assert!(k05 > 0.0 && k05 < 1.0, "tricube(0.5) should be in (0, 1)");
}
#[test]
fn test_nw_tricube_constant_data() {
let x = uniform_grid(20);
let y = vec![5.0; 20];
let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "tricube").unwrap();
for &yi in &y_smooth {
assert!(
(yi - 5.0).abs() < 0.1,
"Tricube NW: constant data should remain constant"
);
}
}
#[test]
fn test_nw_tricube_linear_data() {
let x = uniform_grid(50);
let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "tricube").unwrap();
for i in 10..40 {
let expected = 2.0 * x[i] + 1.0;
assert!(
(y_smooth[i] - expected).abs() < 0.3,
"Tricube NW: linear trend should be approximately preserved at i={i}"
);
}
}
#[test]
fn test_nw_tricube_vs_gaussian() {
let x = uniform_grid(30);
let y: Vec<f64> = x
.iter()
.map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
.collect();
let y_gauss = nadaraya_watson(&x, &y, &x, 0.15, "gaussian").unwrap();
let y_tri = nadaraya_watson(&x, &y, &x, 0.15, "tricube").unwrap();
assert_eq!(y_gauss.len(), y_tri.len());
assert!(y_tri.iter().all(|v| v.is_finite()));
let diff: f64 = y_gauss.iter().zip(&y_tri).map(|(a, b)| (a - b).abs()).sum();
assert!(
diff > 0.0,
"Gaussian and tricube kernels should give different results"
);
}
#[test]
fn test_nw_tricube_vs_epanechnikov() {
let x = uniform_grid(30);
let y: Vec<f64> = x
.iter()
.map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
.collect();
let y_epan = nadaraya_watson(&x, &y, &x, 0.15, "epanechnikov").unwrap();
let y_tri = nadaraya_watson(&x, &y, &x, 0.15, "tricube").unwrap();
assert!(y_epan.iter().all(|v| v.is_finite()));
assert!(y_tri.iter().all(|v| v.is_finite()));
let diff: f64 = y_epan.iter().zip(&y_tri).map(|(a, b)| (a - b).abs()).sum();
assert!(
diff > 0.0,
"Epanechnikov and tricube should give different results"
);
}
#[test]
fn test_ll_tricube_constant_data() {
let x = uniform_grid(20);
let y = vec![3.0; 20];
let y_smooth = local_linear(&x, &y, &x, 0.2, "tricube").unwrap();
for &yi in &y_smooth {
assert!(
(yi - 3.0).abs() < 0.1,
"Tricube LL: constant should remain constant"
);
}
}
#[test]
fn test_ll_tricube_linear_data() {
let x = uniform_grid(30);
let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
let y_smooth = local_linear(&x, &y, &x, 0.2, "tricube").unwrap();
for i in 5..25 {
let expected = 3.0 * x[i] + 2.0;
assert!(
(y_smooth[i] - expected).abs() < 0.2,
"Tricube LL: should fit linear data well at i={i}"
);
}
}
#[test]
fn test_ll_tricube_vs_gaussian() {
let x = uniform_grid(30);
let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
let y_gauss = local_linear(&x, &y, &x, 0.15, "gaussian").unwrap();
let y_tri = local_linear(&x, &y, &x, 0.15, "tricube").unwrap();
assert_eq!(y_gauss.len(), y_tri.len());
assert!(y_tri.iter().all(|v| v.is_finite()));
let diff: f64 = y_gauss.iter().zip(&y_tri).map(|(a, b)| (a - b).abs()).sum();
assert!(
diff > 0.0,
"Gaussian and tricube local linear should differ"
);
}
#[test]
fn test_lp_tricube_quadratic() {
let x = uniform_grid(40);
let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "tricube").unwrap();
for i in 8..32 {
let expected = x[i] * x[i];
assert!(
(y_smooth[i] - expected).abs() < 0.15,
"Tricube LP: should fit quadratic data at i={i}"
);
}
}
#[test]
fn test_get_kernel_tricube_aliases() {
let k1 = get_kernel("tricube");
let k2 = get_kernel("tri-cube");
let test_val = 0.5;
assert!(
(k1(test_val) - k2(test_val)).abs() < 1e-15,
"Both tricube aliases should give the same result"
);
}
#[test]
fn test_smoothing_matrix_tricube() {
let x = uniform_grid(10);
let s = smoothing_matrix_nw(&x, 0.2, "tricube").unwrap();
assert_eq!(s.len(), 100);
for i in 0..10 {
let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
assert!(
(row_sum - 1.0).abs() < 1e-10,
"Tricube: row {} should sum to 1, got {}",
i,
row_sum
);
}
}
#[test]
fn test_cv_smoother_tricube() {
let x = uniform_grid(30);
let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
let cv = cv_smoother(&x, &y, 0.2, "tricube");
assert!(cv.is_finite());
assert!(cv >= 0.0);
}
#[test]
fn test_optim_bandwidth_tricube() {
let x = uniform_grid(25);
let y: Vec<f64> = x
.iter()
.map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
.collect();
let result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "tricube", 20);
assert!(result.h_opt > 0.0);
assert!(result.value.is_finite());
}
}