fdars_core/
smoothing.rs

1//! Smoothing functions for functional data.
2//!
3//! This module provides kernel-based smoothing methods including
4//! Nadaraya-Watson, local linear, and local polynomial regression.
5
6use crate::slice_maybe_parallel;
7#[cfg(feature = "parallel")]
8use rayon::iter::ParallelIterator;
9
10/// Gaussian kernel function.
11fn gaussian_kernel(u: f64) -> f64 {
12    (-0.5 * u * u).exp() / (2.0 * std::f64::consts::PI).sqrt()
13}
14
15/// Epanechnikov kernel function.
16fn epanechnikov_kernel(u: f64) -> f64 {
17    if u.abs() <= 1.0 {
18        0.75 * (1.0 - u * u)
19    } else {
20        0.0
21    }
22}
23
24/// Tri-cube kernel function (used by R's loess()).
25fn tricube_kernel(u: f64) -> f64 {
26    let abs_u = u.abs();
27    if abs_u < 1.0 {
28        (1.0 - abs_u.powi(3)).powi(3)
29    } else {
30        0.0
31    }
32}
33
34/// Get kernel function by name.
35fn get_kernel(kernel_type: &str) -> fn(f64) -> f64 {
36    match kernel_type.to_lowercase().as_str() {
37        "epanechnikov" | "epan" => epanechnikov_kernel,
38        "tricube" | "tri-cube" => tricube_kernel,
39        _ => gaussian_kernel,
40    }
41}
42
43/// Nadaraya-Watson kernel smoother.
44///
45/// # Arguments
46/// * `x` - Predictor values
47/// * `y` - Response values
48/// * `x_new` - Points at which to evaluate the smoother
49/// * `bandwidth` - Kernel bandwidth
50/// * `kernel` - Kernel type ("gaussian" or "epanechnikov")
51///
52/// # Returns
53/// Smoothed values at x_new
54pub fn nadaraya_watson(
55    x: &[f64],
56    y: &[f64],
57    x_new: &[f64],
58    bandwidth: f64,
59    kernel: &str,
60) -> Vec<f64> {
61    let n = x.len();
62    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
63        return vec![0.0; x_new.len()];
64    }
65
66    let kernel_fn = get_kernel(kernel);
67
68    slice_maybe_parallel!(x_new)
69        .map(|&x0| {
70            let mut num = 0.0;
71            let mut denom = 0.0;
72
73            for i in 0..n {
74                let u = (x[i] - x0) / bandwidth;
75                let w = kernel_fn(u);
76                num += w * y[i];
77                denom += w;
78            }
79
80            if denom > 1e-10 {
81                num / denom
82            } else {
83                0.0
84            }
85        })
86        .collect()
87}
88
89/// Local linear regression smoother.
90///
91/// # Arguments
92/// * `x` - Predictor values
93/// * `y` - Response values
94/// * `x_new` - Points at which to evaluate the smoother
95/// * `bandwidth` - Kernel bandwidth
96/// * `kernel` - Kernel type
97///
98/// # Returns
99/// Smoothed values at x_new
100pub fn local_linear(x: &[f64], y: &[f64], x_new: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
101    let n = x.len();
102    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
103        return vec![0.0; x_new.len()];
104    }
105
106    let kernel_fn = get_kernel(kernel);
107
108    slice_maybe_parallel!(x_new)
109        .map(|&x0| {
110            // Compute weighted moments
111            let mut s0 = 0.0;
112            let mut s1 = 0.0;
113            let mut s2 = 0.0;
114            let mut t0 = 0.0;
115            let mut t1 = 0.0;
116
117            for i in 0..n {
118                let u = (x[i] - x0) / bandwidth;
119                let w = kernel_fn(u);
120                let d = x[i] - x0;
121
122                s0 += w;
123                s1 += w * d;
124                s2 += w * d * d;
125                t0 += w * y[i];
126                t1 += w * y[i] * d;
127            }
128
129            // Solve local linear regression
130            let det = s0 * s2 - s1 * s1;
131            if det.abs() > 1e-10 {
132                (s2 * t0 - s1 * t1) / det
133            } else if s0 > 1e-10 {
134                t0 / s0
135            } else {
136                0.0
137            }
138        })
139        .collect()
140}
141
142/// Accumulate weighted normal equations (X'WX and X'Wy) for local polynomial fit.
143fn accumulate_weighted_normal_equations(
144    x: &[f64],
145    y: &[f64],
146    x0: f64,
147    bandwidth: f64,
148    p: usize,
149    kernel_fn: impl Fn(f64) -> f64,
150) -> (Vec<f64>, Vec<f64>) {
151    let n = x.len();
152    let mut xtx = vec![0.0; p * p];
153    let mut xty = vec![0.0; p];
154
155    for i in 0..n {
156        let u = (x[i] - x0) / bandwidth;
157        let w = kernel_fn(u);
158        let d = x[i] - x0;
159
160        for j in 0..p {
161            let w_dj = w * d.powi(j as i32);
162            for k in 0..p {
163                xtx[j * p + k] += w_dj * d.powi(k as i32);
164            }
165            xty[j] += w_dj * y[i];
166        }
167    }
168
169    (xtx, xty)
170}
171
172/// Solve a linear system using Gaussian elimination with partial pivoting.
173/// Returns the solution vector, or a zero vector if the system is singular.
174/// Gaussian elimination with partial pivoting (forward pass).
175/// Find the row with the largest absolute value in column `col` at or below the diagonal.
176fn find_pivot(a: &[f64], p: usize, col: usize) -> usize {
177    let mut max_idx = col;
178    for j in (col + 1)..p {
179        if a[j * p + col].abs() > a[max_idx * p + col].abs() {
180            max_idx = j;
181        }
182    }
183    max_idx
184}
185
186/// Swap two rows in both the matrix `a` and the RHS vector `b`.
187fn swap_rows(a: &mut [f64], b: &mut [f64], p: usize, row_a: usize, row_b: usize) {
188    for k in 0..p {
189        a.swap(row_a * p + k, row_b * p + k);
190    }
191    b.swap(row_a, row_b);
192}
193
194/// Subtract a scaled copy of the pivot row from all rows below it.
195fn eliminate_below(a: &mut [f64], b: &mut [f64], p: usize, pivot_row: usize) {
196    let pivot = a[pivot_row * p + pivot_row];
197    for j in (pivot_row + 1)..p {
198        let factor = a[j * p + pivot_row] / pivot;
199        for k in pivot_row..p {
200            a[j * p + k] -= factor * a[pivot_row * p + k];
201        }
202        b[j] -= factor * b[pivot_row];
203    }
204}
205
206fn forward_eliminate(a: &mut [f64], b: &mut [f64], p: usize) {
207    for i in 0..p {
208        let max_idx = find_pivot(a, p, i);
209        if max_idx != i {
210            swap_rows(a, b, p, i, max_idx);
211        }
212
213        if a[i * p + i].abs() < 1e-10 {
214            continue;
215        }
216
217        eliminate_below(a, b, p, i);
218    }
219}
220
221/// Back substitution for an upper-triangular system.
222fn back_substitute(a: &[f64], b: &[f64], p: usize) -> Vec<f64> {
223    let mut coefs = vec![0.0; p];
224    for i in (0..p).rev() {
225        let mut sum = b[i];
226        for j in (i + 1)..p {
227            sum -= a[i * p + j] * coefs[j];
228        }
229        if a[i * p + i].abs() > 1e-10 {
230            coefs[i] = sum / a[i * p + i];
231        }
232    }
233    coefs
234}
235
236fn solve_gaussian(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
237    forward_eliminate(a, b, p);
238    back_substitute(a, b, p)
239}
240
241/// Solve a linear system Ax = b via Gaussian elimination with partial pivoting.
242///
243/// Public wrapper for use by other modules (e.g., `fregre_basis_cv`).
244/// `a` is a p×p matrix in row-major order, `b` is the RHS vector of length p.
245/// Both are modified in place.
246pub fn solve_gaussian_pub(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
247    solve_gaussian(a, b, p)
248}
249
250/// Local polynomial regression smoother.
251///
252/// # Arguments
253/// * `x` - Predictor values
254/// * `y` - Response values
255/// * `x_new` - Points at which to evaluate the smoother
256/// * `bandwidth` - Kernel bandwidth
257/// * `degree` - Polynomial degree
258/// * `kernel` - Kernel type
259///
260/// # Returns
261/// Smoothed values at x_new
262pub fn local_polynomial(
263    x: &[f64],
264    y: &[f64],
265    x_new: &[f64],
266    bandwidth: f64,
267    degree: usize,
268    kernel: &str,
269) -> Vec<f64> {
270    let n = x.len();
271    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
272        return vec![0.0; x_new.len()];
273    }
274    if degree == 0 {
275        return nadaraya_watson(x, y, x_new, bandwidth, kernel);
276    }
277
278    if degree == 1 {
279        return local_linear(x, y, x_new, bandwidth, kernel);
280    }
281
282    let kernel_fn = get_kernel(kernel);
283    let p = degree + 1; // Number of coefficients
284
285    slice_maybe_parallel!(x_new)
286        .map(|&x0| {
287            let (mut xtx, mut xty) =
288                accumulate_weighted_normal_equations(x, y, x0, bandwidth, p, kernel_fn);
289            let coefs = solve_gaussian(&mut xtx, &mut xty, p);
290            coefs[0]
291        })
292        .collect()
293}
294
295/// k-Nearest Neighbors smoother.
296///
297/// # Arguments
298/// * `x` - Predictor values
299/// * `y` - Response values
300/// * `x_new` - Points at which to evaluate the smoother
301/// * `k` - Number of neighbors
302///
303/// # Returns
304/// Smoothed values at x_new
305pub fn knn_smoother(x: &[f64], y: &[f64], x_new: &[f64], k: usize) -> Vec<f64> {
306    let n = x.len();
307    if n == 0 || y.len() != n || x_new.is_empty() || k == 0 {
308        return vec![0.0; x_new.len()];
309    }
310
311    let k = k.min(n);
312
313    slice_maybe_parallel!(x_new)
314        .map(|&x0| {
315            // Compute distances
316            let mut distances: Vec<(usize, f64)> = x
317                .iter()
318                .enumerate()
319                .map(|(i, &xi)| (i, (xi - x0).abs()))
320                .collect();
321
322            // Partial sort to get k nearest
323            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
324
325            // Average of k nearest neighbors
326            let sum: f64 = distances.iter().take(k).map(|(i, _)| y[*i]).sum();
327            sum / k as f64
328        })
329        .collect()
330}
331
332/// Compute smoothing matrix for Nadaraya-Watson.
333///
334/// Returns the smoother matrix S such that y_hat = S * y.
335pub fn smoothing_matrix_nw(x: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
336    let n = x.len();
337    if n == 0 || bandwidth <= 0.0 {
338        return Vec::new();
339    }
340
341    let kernel_fn = get_kernel(kernel);
342    let mut s = vec![0.0; n * n];
343
344    for i in 0..n {
345        let mut row_sum = 0.0;
346        for j in 0..n {
347            let u = (x[j] - x[i]) / bandwidth;
348            s[i + j * n] = kernel_fn(u);
349            row_sum += s[i + j * n];
350        }
351        if row_sum > 1e-10 {
352            for j in 0..n {
353                s[i + j * n] /= row_sum;
354            }
355        }
356    }
357
358    s
359}
360
361// ─── Cross-Validation for Kernel Smoothers ──────────────────────────────────
362
363/// CV criterion type for bandwidth selection.
364#[derive(Debug, Clone, Copy, PartialEq)]
365pub enum CvCriterion {
366    /// Leave-one-out cross-validation (R's `CV.S`).
367    Cv,
368    /// Generalized cross-validation (R's `GCV.S`).
369    Gcv,
370}
371
372/// Result of bandwidth optimization.
373#[derive(Debug, Clone)]
374pub struct OptimBandwidthResult {
375    /// Optimal bandwidth.
376    pub h_opt: f64,
377    /// Criterion used.
378    pub criterion: CvCriterion,
379    /// Criterion value at optimal h.
380    pub value: f64,
381}
382
383/// LOO-CV score for a kernel smoother (R's `CV.S`).
384///
385/// Computes the leave-one-out CV score by zeroing the diagonal of the
386/// smoothing matrix, re-normalizing rows, and computing mean squared error.
387///
388/// # Arguments
389/// * `x` — Predictor values
390/// * `y` — Response values
391/// * `bandwidth` — Kernel bandwidth
392/// * `kernel` — Kernel type ("gaussian", "epanechnikov", "tricube")
393///
394/// # Returns
395/// Mean squared LOO prediction error, or `INFINITY` if inputs are invalid.
396pub fn cv_smoother(x: &[f64], y: &[f64], bandwidth: f64, kernel: &str) -> f64 {
397    let n = x.len();
398    if n < 2 || y.len() != n || bandwidth <= 0.0 {
399        return f64::INFINITY;
400    }
401
402    // Get the smoother matrix S
403    let mut s = smoothing_matrix_nw(x, bandwidth, kernel);
404    if s.is_empty() {
405        return f64::INFINITY;
406    }
407
408    // Zero the diagonal → S_cv (LOO smoother)
409    for i in 0..n {
410        s[i + i * n] = 0.0;
411    }
412
413    // Re-normalize each row so it sums to 1
414    for i in 0..n {
415        let row_sum: f64 = (0..n).map(|j| s[i + j * n]).sum();
416        if row_sum > 1e-10 {
417            for j in 0..n {
418                s[i + j * n] /= row_sum;
419            }
420        }
421    }
422
423    // Compute y_hat = S_cv * y, then MSE
424    let mut mse = 0.0;
425    for i in 0..n {
426        let y_hat: f64 = (0..n).map(|j| s[i + j * n] * y[j]).sum();
427        let resid = y[i] - y_hat;
428        mse += resid * resid;
429    }
430    mse / n as f64
431}
432
433/// GCV score for a kernel smoother (R's `GCV.S`).
434///
435/// Computes `(RSS / n) / (1 - tr(S) / n)²`.
436///
437/// # Arguments
438/// * `x` — Predictor values
439/// * `y` — Response values
440/// * `bandwidth` — Kernel bandwidth
441/// * `kernel` — Kernel type
442///
443/// # Returns
444/// GCV score, or `INFINITY` if inputs are invalid or denominator is near zero.
445pub fn gcv_smoother(x: &[f64], y: &[f64], bandwidth: f64, kernel: &str) -> f64 {
446    let n = x.len();
447    if n < 2 || y.len() != n || bandwidth <= 0.0 {
448        return f64::INFINITY;
449    }
450
451    let s = smoothing_matrix_nw(x, bandwidth, kernel);
452    if s.is_empty() {
453        return f64::INFINITY;
454    }
455
456    // y_hat = S * y
457    let mut rss = 0.0;
458    for i in 0..n {
459        let y_hat: f64 = (0..n).map(|j| s[i + j * n] * y[j]).sum();
460        let resid = y[i] - y_hat;
461        rss += resid * resid;
462    }
463
464    // trace(S) = sum of diagonal
465    let trace_s: f64 = (0..n).map(|i| s[i + i * n]).sum();
466
467    let denom = 1.0 - trace_s / n as f64;
468    if denom.abs() < 1e-10 {
469        f64::INFINITY
470    } else {
471        (rss / n as f64) / (denom * denom)
472    }
473}
474
475/// Bandwidth optimizer for kernel smoothers (R's `optim.np`).
476///
477/// Grid search over evenly-spaced bandwidths, selecting the one that
478/// minimizes the specified criterion (CV or GCV).
479///
480/// # Arguments
481/// * `x` — Predictor values
482/// * `y` — Response values
483/// * `h_range` — Optional `(h_min, h_max)`. Defaults to `(h_default / 5, h_default * 5)`
484///   where `h_default = (x_max - x_min) / n^0.2`.
485/// * `criterion` — CV or GCV
486/// * `kernel` — Kernel type
487/// * `n_grid` — Number of grid points (default: 50)
488pub fn optim_bandwidth(
489    x: &[f64],
490    y: &[f64],
491    h_range: Option<(f64, f64)>,
492    criterion: CvCriterion,
493    kernel: &str,
494    n_grid: usize,
495) -> OptimBandwidthResult {
496    let n = x.len();
497    let n_grid = n_grid.max(2);
498
499    // Determine search range
500    let (h_min, h_max) = match h_range {
501        Some((lo, hi)) if lo > 0.0 && hi > lo => (lo, hi),
502        _ => {
503            let x_min = x.iter().copied().fold(f64::INFINITY, f64::min);
504            let x_max = x.iter().copied().fold(f64::NEG_INFINITY, f64::max);
505            let h_default = (x_max - x_min) / (n as f64).powf(0.2);
506            let h_default = h_default.max(1e-10);
507            (h_default / 5.0, h_default * 5.0)
508        }
509    };
510
511    let score_fn = match criterion {
512        CvCriterion::Cv => cv_smoother,
513        CvCriterion::Gcv => gcv_smoother,
514    };
515
516    let mut best_h = h_min;
517    let mut best_score = f64::INFINITY;
518
519    for i in 0..n_grid {
520        let h = h_min + (h_max - h_min) * i as f64 / (n_grid - 1) as f64;
521        let score = score_fn(x, y, h, kernel);
522        if score < best_score {
523            best_score = score;
524            best_h = h;
525        }
526    }
527
528    OptimBandwidthResult {
529        h_opt: best_h,
530        criterion,
531        value: best_score,
532    }
533}
534
535// ─── kNN CV Functions ───────────────────────────────────────────────────────
536
537/// Result of kNN k-selection by cross-validation.
538#[derive(Debug, Clone)]
539pub struct KnnCvResult {
540    /// Optimal k (number of neighbors).
541    pub optimal_k: usize,
542    /// CV error for each k tested (index 0 = k=1).
543    pub cv_errors: Vec<f64>,
544}
545
546/// Global LOO-CV for kNN k selection (R's `knn.gcv`).
547///
548/// For each candidate k, computes LOO prediction error using a
549/// kernel-weighted kNN estimator with Epanechnikov kernel.
550///
551/// # Arguments
552/// * `x` — Predictor values
553/// * `y` — Response values
554/// * `max_k` — Maximum k to test (tests k = 1, 2, …, max_k)
555pub fn knn_gcv(x: &[f64], y: &[f64], max_k: usize) -> KnnCvResult {
556    let n = x.len();
557    let max_k = max_k.min(n.saturating_sub(1)).max(1);
558
559    // Precompute sorted distances from each point to all others
560    let mut sorted_neighbors: Vec<Vec<(usize, f64)>> = Vec::with_capacity(n);
561    for i in 0..n {
562        let mut dists: Vec<(usize, f64)> = (0..n)
563            .filter(|&j| j != i)
564            .map(|j| (j, (x[j] - x[i]).abs()))
565            .collect();
566        dists.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
567        sorted_neighbors.push(dists);
568    }
569
570    let mut cv_errors = Vec::with_capacity(max_k);
571
572    for k in 1..=max_k {
573        let mut mse = 0.0;
574        for i in 0..n {
575            let neighbors = &sorted_neighbors[i];
576            // Bandwidth: midpoint between k-th and (k+1)-th NN distances
577            let d_k = if k <= neighbors.len() {
578                neighbors[k - 1].1
579            } else {
580                neighbors.last().map_or(1.0, |x| x.1)
581            };
582            let d_k1 = if k < neighbors.len() {
583                neighbors[k].1
584            } else {
585                d_k * 2.0
586            };
587            let h = (d_k + d_k1) / 2.0;
588            let h = h.max(1e-10);
589
590            // Epanechnikov kernel weighted prediction
591            let mut num = 0.0;
592            let mut den = 0.0;
593            for &(j, dist) in neighbors.iter().take(k) {
594                let u = dist / h;
595                let w = epanechnikov_kernel(u);
596                num += w * y[j];
597                den += w;
598            }
599            let y_hat = if den > 1e-10 { num / den } else { y[i] };
600            mse += (y[i] - y_hat).powi(2);
601        }
602        cv_errors.push(mse / n as f64);
603    }
604
605    let optimal_k = cv_errors
606        .iter()
607        .enumerate()
608        .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
609        .map_or(1, |(i, _)| i + 1);
610
611    KnnCvResult {
612        optimal_k,
613        cv_errors,
614    }
615}
616
617/// Local (per-observation) LOO-CV for kNN k selection (R's `knn.lcv`).
618///
619/// For each observation, independently selects the best k by minimizing
620/// the absolute LOO prediction error at that point.
621///
622/// # Arguments
623/// * `x` — Predictor values
624/// * `y` — Response values
625/// * `max_k` — Maximum k to test
626///
627/// # Returns
628/// Vector of per-observation optimal k values (length n).
629pub fn knn_lcv(x: &[f64], y: &[f64], max_k: usize) -> Vec<usize> {
630    let n = x.len();
631    let max_k = max_k.min(n.saturating_sub(1)).max(1);
632
633    let mut per_obs_k = vec![1usize; n];
634
635    for i in 0..n {
636        // Sort neighbors by distance (excluding self)
637        let mut neighbors: Vec<(usize, f64)> = (0..n)
638            .filter(|&j| j != i)
639            .map(|j| (j, (x[j] - x[i]).abs()))
640            .collect();
641        neighbors.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
642
643        let mut best_k = 1;
644        let mut best_err = f64::INFINITY;
645
646        for k in 1..=max_k {
647            // Simple kNN average of k nearest neighbors
648            let sum: f64 = neighbors.iter().take(k).map(|&(j, _)| y[j]).sum();
649            let y_hat = sum / k as f64;
650            let err = (y[i] - y_hat).abs();
651            if err < best_err {
652                best_err = err;
653                best_k = k;
654            }
655        }
656        per_obs_k[i] = best_k;
657    }
658
659    per_obs_k
660}
661
662#[cfg(test)]
663mod tests {
664    use super::*;
665
666    fn uniform_grid(n: usize) -> Vec<f64> {
667        (0..n).map(|i| i as f64 / (n - 1) as f64).collect()
668    }
669
670    // ============== Nadaraya-Watson tests ==============
671
672    #[test]
673    fn test_nw_constant_data() {
674        let x = uniform_grid(20);
675        let y: Vec<f64> = vec![5.0; 20];
676
677        let y_smooth = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
678
679        // Smoothing constant data should return constant
680        for &yi in &y_smooth {
681            assert!(
682                (yi - 5.0).abs() < 0.1,
683                "Constant data should remain constant"
684            );
685        }
686    }
687
688    #[test]
689    fn test_nw_linear_data() {
690        let x = uniform_grid(50);
691        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
692
693        let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "gaussian");
694
695        // Linear data should be approximately preserved (with some edge effects)
696        for i in 10..40 {
697            let expected = 2.0 * x[i] + 1.0;
698            assert!(
699                (y_smooth[i] - expected).abs() < 0.2,
700                "Linear trend should be approximately preserved"
701            );
702        }
703    }
704
705    #[test]
706    fn test_nw_gaussian_vs_epanechnikov() {
707        let x = uniform_grid(30);
708        let y: Vec<f64> = x
709            .iter()
710            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
711            .collect();
712
713        let y_gauss = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
714        let y_epan = nadaraya_watson(&x, &y, &x, 0.1, "epanechnikov");
715
716        // Both should produce valid output
717        assert_eq!(y_gauss.len(), 30);
718        assert_eq!(y_epan.len(), 30);
719
720        // They should be different (different kernels)
721        let diff: f64 = y_gauss
722            .iter()
723            .zip(&y_epan)
724            .map(|(a, b)| (a - b).abs())
725            .sum();
726        assert!(
727            diff > 0.0,
728            "Different kernels should give different results"
729        );
730    }
731
732    #[test]
733    fn test_nw_invalid_input() {
734        // Empty input
735        let result = nadaraya_watson(&[], &[], &[0.5], 0.1, "gaussian");
736        assert_eq!(result, vec![0.0]);
737
738        // Mismatched lengths
739        let result = nadaraya_watson(&[0.0, 1.0], &[1.0], &[0.5], 0.1, "gaussian");
740        assert_eq!(result, vec![0.0]);
741
742        // Zero bandwidth
743        let result = nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.0, "gaussian");
744        assert_eq!(result, vec![0.0]);
745    }
746
747    // ============== Local linear tests ==============
748
749    #[test]
750    fn test_ll_constant_data() {
751        let x = uniform_grid(20);
752        let y: Vec<f64> = vec![3.0; 20];
753
754        let y_smooth = local_linear(&x, &y, &x, 0.15, "gaussian");
755
756        for &yi in &y_smooth {
757            assert!((yi - 3.0).abs() < 0.1, "Constant should remain constant");
758        }
759    }
760
761    #[test]
762    fn test_ll_linear_data_exact() {
763        let x = uniform_grid(30);
764        let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
765
766        let y_smooth = local_linear(&x, &y, &x, 0.2, "gaussian");
767
768        // Local linear should fit linear data exactly (in interior)
769        for i in 5..25 {
770            let expected = 3.0 * x[i] + 2.0;
771            assert!(
772                (y_smooth[i] - expected).abs() < 0.1,
773                "Local linear should fit linear data well"
774            );
775        }
776    }
777
778    #[test]
779    fn test_ll_invalid_input() {
780        let result = local_linear(&[], &[], &[0.5], 0.1, "gaussian");
781        assert_eq!(result, vec![0.0]);
782
783        let result = local_linear(&[0.0, 1.0], &[1.0, 2.0], &[0.5], -0.1, "gaussian");
784        assert_eq!(result, vec![0.0]);
785    }
786
787    // ============== Local polynomial tests ==============
788
789    #[test]
790    fn test_lp_degree1_equals_local_linear() {
791        let x = uniform_grid(25);
792        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
793
794        let y_ll = local_linear(&x, &y, &x, 0.15, "gaussian");
795        let y_lp = local_polynomial(&x, &y, &x, 0.15, 1, "gaussian");
796
797        for i in 0..25 {
798            assert!(
799                (y_ll[i] - y_lp[i]).abs() < 1e-10,
800                "Degree 1 should equal local linear"
801            );
802        }
803    }
804
805    #[test]
806    fn test_lp_quadratic_data() {
807        let x = uniform_grid(40);
808        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
809
810        let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "gaussian");
811
812        // Local quadratic should fit quadratic data well in interior
813        for i in 8..32 {
814            let expected = x[i] * x[i];
815            assert!(
816                (y_smooth[i] - expected).abs() < 0.1,
817                "Local quadratic should fit quadratic data"
818            );
819        }
820    }
821
822    #[test]
823    fn test_lp_invalid_input() {
824        // Zero degree delegates to Nadaraya-Watson (not zeros)
825        let result = local_polynomial(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, 0, "gaussian");
826        let nw = nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, "gaussian");
827        assert_eq!(result, nw);
828
829        // Empty input
830        let result = local_polynomial(&[], &[], &[0.5], 0.1, 2, "gaussian");
831        assert_eq!(result, vec![0.0]);
832    }
833
834    // ============== KNN smoother tests ==============
835
836    #[test]
837    fn test_knn_k1_nearest() {
838        let x = vec![0.0, 0.5, 1.0];
839        let y = vec![1.0, 2.0, 3.0];
840
841        let result = knn_smoother(&x, &y, &[0.1, 0.6, 0.9], 1);
842
843        // k=1 should return the nearest neighbor's y value
844        assert!((result[0] - 1.0).abs() < 1e-10, "0.1 nearest to 0.0 -> 1.0");
845        assert!((result[1] - 2.0).abs() < 1e-10, "0.6 nearest to 0.5 -> 2.0");
846        assert!((result[2] - 3.0).abs() < 1e-10, "0.9 nearest to 1.0 -> 3.0");
847    }
848
849    #[test]
850    fn test_knn_k_equals_n_is_mean() {
851        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
852        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
853        let expected_mean = 3.0;
854
855        let result = knn_smoother(&x, &y, &[0.5], 5);
856
857        assert!(
858            (result[0] - expected_mean).abs() < 1e-10,
859            "k=n should return mean"
860        );
861    }
862
863    #[test]
864    fn test_knn_invalid_input() {
865        let result = knn_smoother(&[], &[], &[0.5], 3);
866        assert_eq!(result, vec![0.0]);
867
868        let result = knn_smoother(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0);
869        assert_eq!(result, vec![0.0]);
870    }
871
872    // ============== Smoothing matrix tests ==============
873
874    #[test]
875    fn test_smoothing_matrix_row_stochastic() {
876        let x = uniform_grid(10);
877        let s = smoothing_matrix_nw(&x, 0.2, "gaussian");
878
879        assert_eq!(s.len(), 100);
880
881        // Each row should sum to 1 (row stochastic)
882        for i in 0..10 {
883            let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
884            assert!(
885                (row_sum - 1.0).abs() < 1e-10,
886                "Row {} should sum to 1, got {}",
887                i,
888                row_sum
889            );
890        }
891    }
892
893    #[test]
894    fn test_smoothing_matrix_invalid_input() {
895        let result = smoothing_matrix_nw(&[], 0.1, "gaussian");
896        assert!(result.is_empty());
897
898        let result = smoothing_matrix_nw(&[0.0, 1.0], 0.0, "gaussian");
899        assert!(result.is_empty());
900    }
901
902    #[test]
903    fn test_nan_nw_no_panic() {
904        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
905        let mut y = vec![0.0, 1.0, 2.0, 1.0, 0.0];
906        y[2] = f64::NAN;
907        let result = nadaraya_watson(&x, &y, &x, 0.3, "gaussian");
908        assert_eq!(result.len(), x.len());
909        // NaN should propagate but not panic
910    }
911
912    #[test]
913    fn test_n1_smoother() {
914        // Single data point
915        let x = vec![0.5];
916        let y = vec![3.0];
917        let x_new = vec![0.5];
918        let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian");
919        assert_eq!(result.len(), 1);
920        assert!(
921            (result[0] - 3.0).abs() < 1e-6,
922            "Single point smoother should return the value"
923        );
924    }
925
926    #[test]
927    fn test_duplicate_x_smoother() {
928        // Duplicate x values
929        let x = vec![0.0, 0.0, 0.5, 1.0, 1.0];
930        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
931        let x_new = vec![0.0, 0.5, 1.0];
932        let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian");
933        assert_eq!(result.len(), 3);
934        for v in &result {
935            assert!(v.is_finite());
936        }
937    }
938
939    // ============== CV smoother tests ==============
940
941    #[test]
942    fn test_cv_smoother_linear_data() {
943        let x = uniform_grid(30);
944        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
945        let cv = cv_smoother(&x, &y, 0.2, "gaussian");
946        assert!(cv.is_finite());
947        assert!(cv >= 0.0);
948        assert!(cv < 1.0, "CV error for smooth linear data should be small");
949    }
950
951    #[test]
952    fn test_cv_smoother_invalid() {
953        assert_eq!(cv_smoother(&[], &[], 0.1, "gaussian"), f64::INFINITY);
954        assert_eq!(
955            cv_smoother(&[0.0, 1.0], &[1.0, 2.0], -0.1, "gaussian"),
956            f64::INFINITY
957        );
958    }
959
960    #[test]
961    fn test_gcv_smoother_linear_data() {
962        let x = uniform_grid(30);
963        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
964        let gcv = gcv_smoother(&x, &y, 0.2, "gaussian");
965        assert!(gcv.is_finite());
966        assert!(gcv >= 0.0);
967    }
968
969    #[test]
970    fn test_gcv_smoother_invalid() {
971        assert_eq!(gcv_smoother(&[], &[], 0.1, "gaussian"), f64::INFINITY);
972    }
973
974    #[test]
975    fn test_optim_bandwidth_returns_valid() {
976        let x = uniform_grid(30);
977        let y: Vec<f64> = x
978            .iter()
979            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
980            .collect();
981
982        let result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "gaussian", 20);
983        assert!(result.h_opt > 0.0);
984        assert!(result.value.is_finite());
985        assert_eq!(result.criterion, CvCriterion::Gcv);
986    }
987
988    #[test]
989    fn test_optim_bandwidth_cv_vs_gcv() {
990        let x = uniform_grid(25);
991        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
992
993        let cv_result = optim_bandwidth(&x, &y, None, CvCriterion::Cv, "gaussian", 20);
994        let gcv_result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "gaussian", 20);
995
996        assert!(cv_result.h_opt > 0.0);
997        assert!(gcv_result.h_opt > 0.0);
998    }
999
1000    #[test]
1001    fn test_optim_bandwidth_custom_range() {
1002        let x = uniform_grid(20);
1003        let y: Vec<f64> = x.to_vec();
1004        let result = optim_bandwidth(&x, &y, Some((0.05, 0.5)), CvCriterion::Cv, "gaussian", 10);
1005        assert!(result.h_opt >= 0.05);
1006        assert!(result.h_opt <= 0.5);
1007    }
1008
1009    // ============== kNN CV tests ==============
1010
1011    #[test]
1012    fn test_knn_gcv_returns_valid() {
1013        let x = uniform_grid(20);
1014        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
1015
1016        let result = knn_gcv(&x, &y, 10);
1017        assert!(result.optimal_k >= 1);
1018        assert!(result.optimal_k <= 10);
1019        assert_eq!(result.cv_errors.len(), 10);
1020        for &e in &result.cv_errors {
1021            assert!(e.is_finite());
1022            assert!(e >= 0.0);
1023        }
1024    }
1025
1026    #[test]
1027    fn test_knn_gcv_constant_data() {
1028        let x = uniform_grid(15);
1029        let y = vec![5.0; 15];
1030        let result = knn_gcv(&x, &y, 5);
1031        // For constant data, error should be near zero for any k
1032        for &e in &result.cv_errors {
1033            assert!(e < 0.01, "Constant data: CV error should be near zero");
1034        }
1035    }
1036
1037    #[test]
1038    fn test_knn_lcv_returns_valid() {
1039        let x = uniform_grid(15);
1040        let y: Vec<f64> = x.to_vec();
1041
1042        let result = knn_lcv(&x, &y, 5);
1043        assert_eq!(result.len(), 15);
1044        for &k in &result {
1045            assert!(k >= 1);
1046            assert!(k <= 5);
1047        }
1048    }
1049
1050    #[test]
1051    fn test_knn_lcv_constant_data() {
1052        let x = uniform_grid(10);
1053        let y = vec![3.0; 10];
1054        let result = knn_lcv(&x, &y, 5);
1055        assert_eq!(result.len(), 10);
1056        // For constant data, all k values should give zero error
1057        // so k=1 is optimal (first tested)
1058        for &k in &result {
1059            assert!(k >= 1);
1060        }
1061    }
1062
1063    // ============== Tricube kernel tests ==============
1064
1065    #[test]
1066    fn test_tricube_kernel_values() {
1067        // At u=0, tricube should be 1.0
1068        let k0 = tricube_kernel(0.0);
1069        assert!((k0 - 1.0).abs() < 1e-10, "tricube(0) should be 1.0");
1070
1071        // At |u| >= 1, tricube should be 0.0
1072        assert_eq!(tricube_kernel(1.0), 0.0, "tricube(1) should be 0");
1073        assert_eq!(tricube_kernel(-1.0), 0.0, "tricube(-1) should be 0");
1074        assert_eq!(tricube_kernel(2.0), 0.0, "tricube(2) should be 0");
1075
1076        // At u=0.5, should be positive and less than 1
1077        let k05 = tricube_kernel(0.5);
1078        assert!(k05 > 0.0 && k05 < 1.0, "tricube(0.5) should be in (0, 1)");
1079    }
1080
1081    #[test]
1082    fn test_nw_tricube_constant_data() {
1083        let x = uniform_grid(20);
1084        let y = vec![5.0; 20];
1085
1086        let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "tricube");
1087
1088        for &yi in &y_smooth {
1089            assert!(
1090                (yi - 5.0).abs() < 0.1,
1091                "Tricube NW: constant data should remain constant"
1092            );
1093        }
1094    }
1095
1096    #[test]
1097    fn test_nw_tricube_linear_data() {
1098        let x = uniform_grid(50);
1099        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
1100
1101        let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "tricube");
1102
1103        // Interior points should be approximately correct
1104        for i in 10..40 {
1105            let expected = 2.0 * x[i] + 1.0;
1106            assert!(
1107                (y_smooth[i] - expected).abs() < 0.3,
1108                "Tricube NW: linear trend should be approximately preserved at i={i}"
1109            );
1110        }
1111    }
1112
1113    #[test]
1114    fn test_nw_tricube_vs_gaussian() {
1115        let x = uniform_grid(30);
1116        let y: Vec<f64> = x
1117            .iter()
1118            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
1119            .collect();
1120
1121        let y_gauss = nadaraya_watson(&x, &y, &x, 0.15, "gaussian");
1122        let y_tri = nadaraya_watson(&x, &y, &x, 0.15, "tricube");
1123
1124        assert_eq!(y_gauss.len(), y_tri.len());
1125
1126        // Both should produce valid output
1127        assert!(y_tri.iter().all(|v| v.is_finite()));
1128
1129        // They should be different
1130        let diff: f64 = y_gauss.iter().zip(&y_tri).map(|(a, b)| (a - b).abs()).sum();
1131        assert!(
1132            diff > 0.0,
1133            "Gaussian and tricube kernels should give different results"
1134        );
1135    }
1136
1137    #[test]
1138    fn test_nw_tricube_vs_epanechnikov() {
1139        let x = uniform_grid(30);
1140        let y: Vec<f64> = x
1141            .iter()
1142            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
1143            .collect();
1144
1145        let y_epan = nadaraya_watson(&x, &y, &x, 0.15, "epanechnikov");
1146        let y_tri = nadaraya_watson(&x, &y, &x, 0.15, "tricube");
1147
1148        // Both compact support kernels should produce finite output
1149        assert!(y_epan.iter().all(|v| v.is_finite()));
1150        assert!(y_tri.iter().all(|v| v.is_finite()));
1151
1152        // Should differ since kernel shapes are different
1153        let diff: f64 = y_epan.iter().zip(&y_tri).map(|(a, b)| (a - b).abs()).sum();
1154        assert!(
1155            diff > 0.0,
1156            "Epanechnikov and tricube should give different results"
1157        );
1158    }
1159
1160    #[test]
1161    fn test_ll_tricube_constant_data() {
1162        let x = uniform_grid(20);
1163        let y = vec![3.0; 20];
1164
1165        let y_smooth = local_linear(&x, &y, &x, 0.2, "tricube");
1166
1167        for &yi in &y_smooth {
1168            assert!(
1169                (yi - 3.0).abs() < 0.1,
1170                "Tricube LL: constant should remain constant"
1171            );
1172        }
1173    }
1174
1175    #[test]
1176    fn test_ll_tricube_linear_data() {
1177        let x = uniform_grid(30);
1178        let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
1179
1180        let y_smooth = local_linear(&x, &y, &x, 0.2, "tricube");
1181
1182        // Local linear should fit linear data well in the interior
1183        for i in 5..25 {
1184            let expected = 3.0 * x[i] + 2.0;
1185            assert!(
1186                (y_smooth[i] - expected).abs() < 0.2,
1187                "Tricube LL: should fit linear data well at i={i}"
1188            );
1189        }
1190    }
1191
1192    #[test]
1193    fn test_ll_tricube_vs_gaussian() {
1194        let x = uniform_grid(30);
1195        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
1196
1197        let y_gauss = local_linear(&x, &y, &x, 0.15, "gaussian");
1198        let y_tri = local_linear(&x, &y, &x, 0.15, "tricube");
1199
1200        assert_eq!(y_gauss.len(), y_tri.len());
1201        assert!(y_tri.iter().all(|v| v.is_finite()));
1202
1203        let diff: f64 = y_gauss.iter().zip(&y_tri).map(|(a, b)| (a - b).abs()).sum();
1204        assert!(
1205            diff > 0.0,
1206            "Gaussian and tricube local linear should differ"
1207        );
1208    }
1209
1210    #[test]
1211    fn test_lp_tricube_quadratic() {
1212        let x = uniform_grid(40);
1213        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
1214
1215        let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "tricube");
1216
1217        // Local quadratic with tricube should fit well in interior
1218        for i in 8..32 {
1219            let expected = x[i] * x[i];
1220            assert!(
1221                (y_smooth[i] - expected).abs() < 0.15,
1222                "Tricube LP: should fit quadratic data at i={i}"
1223            );
1224        }
1225    }
1226
1227    #[test]
1228    fn test_get_kernel_tricube_aliases() {
1229        // "tricube" and "tri-cube" should both resolve to the tricube kernel
1230        let k1 = get_kernel("tricube");
1231        let k2 = get_kernel("tri-cube");
1232
1233        let test_val = 0.5;
1234        assert!(
1235            (k1(test_val) - k2(test_val)).abs() < 1e-15,
1236            "Both tricube aliases should give the same result"
1237        );
1238    }
1239
1240    #[test]
1241    fn test_smoothing_matrix_tricube() {
1242        let x = uniform_grid(10);
1243        let s = smoothing_matrix_nw(&x, 0.2, "tricube");
1244
1245        assert_eq!(s.len(), 100);
1246
1247        // Each row should sum to 1 (row stochastic)
1248        for i in 0..10 {
1249            let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
1250            assert!(
1251                (row_sum - 1.0).abs() < 1e-10,
1252                "Tricube: row {} should sum to 1, got {}",
1253                i,
1254                row_sum
1255            );
1256        }
1257    }
1258
1259    #[test]
1260    fn test_cv_smoother_tricube() {
1261        let x = uniform_grid(30);
1262        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
1263        let cv = cv_smoother(&x, &y, 0.2, "tricube");
1264        assert!(cv.is_finite());
1265        assert!(cv >= 0.0);
1266    }
1267
1268    #[test]
1269    fn test_optim_bandwidth_tricube() {
1270        let x = uniform_grid(25);
1271        let y: Vec<f64> = x
1272            .iter()
1273            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
1274            .collect();
1275
1276        let result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "tricube", 20);
1277        assert!(result.h_opt > 0.0);
1278        assert!(result.value.is_finite());
1279    }
1280}
fdars_core/smoothing.rs

fdars_core/
smoothing.rs