fdars_core/
smoothing.rs

1//! Smoothing functions for functional data.
2//!
3//! This module provides kernel-based smoothing methods including
4//! Nadaraya-Watson, local linear, and local polynomial regression.
5
6use crate::slice_maybe_parallel;
7#[cfg(feature = "parallel")]
8use rayon::iter::ParallelIterator;
9
10/// Gaussian kernel function.
11fn gaussian_kernel(u: f64) -> f64 {
12    (-0.5 * u * u).exp() / (2.0 * std::f64::consts::PI).sqrt()
13}
14
15/// Epanechnikov kernel function.
16fn epanechnikov_kernel(u: f64) -> f64 {
17    if u.abs() <= 1.0 {
18        0.75 * (1.0 - u * u)
19    } else {
20        0.0
21    }
22}
23
24/// Tri-cube kernel function (used by R's loess()).
25fn tricube_kernel(u: f64) -> f64 {
26    let abs_u = u.abs();
27    if abs_u < 1.0 {
28        (1.0 - abs_u.powi(3)).powi(3)
29    } else {
30        0.0
31    }
32}
33
34/// Get kernel function by name.
35fn get_kernel(kernel_type: &str) -> fn(f64) -> f64 {
36    match kernel_type.to_lowercase().as_str() {
37        "epanechnikov" | "epan" => epanechnikov_kernel,
38        "tricube" | "tri-cube" => tricube_kernel,
39        _ => gaussian_kernel,
40    }
41}
42
43/// Nadaraya-Watson kernel smoother.
44///
45/// # Arguments
46/// * `x` - Predictor values
47/// * `y` - Response values
48/// * `x_new` - Points at which to evaluate the smoother
49/// * `bandwidth` - Kernel bandwidth
50/// * `kernel` - Kernel type ("gaussian" or "epanechnikov")
51///
52/// # Returns
53/// Smoothed values at x_new
54pub fn nadaraya_watson(
55    x: &[f64],
56    y: &[f64],
57    x_new: &[f64],
58    bandwidth: f64,
59    kernel: &str,
60) -> Vec<f64> {
61    let n = x.len();
62    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
63        return vec![0.0; x_new.len()];
64    }
65
66    let kernel_fn = get_kernel(kernel);
67
68    slice_maybe_parallel!(x_new)
69        .map(|&x0| {
70            let mut num = 0.0;
71            let mut denom = 0.0;
72
73            for i in 0..n {
74                let u = (x[i] - x0) / bandwidth;
75                let w = kernel_fn(u);
76                num += w * y[i];
77                denom += w;
78            }
79
80            if denom > 1e-10 {
81                num / denom
82            } else {
83                0.0
84            }
85        })
86        .collect()
87}
88
89/// Local linear regression smoother.
90///
91/// # Arguments
92/// * `x` - Predictor values
93/// * `y` - Response values
94/// * `x_new` - Points at which to evaluate the smoother
95/// * `bandwidth` - Kernel bandwidth
96/// * `kernel` - Kernel type
97///
98/// # Returns
99/// Smoothed values at x_new
100pub fn local_linear(x: &[f64], y: &[f64], x_new: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
101    let n = x.len();
102    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
103        return vec![0.0; x_new.len()];
104    }
105
106    let kernel_fn = get_kernel(kernel);
107
108    slice_maybe_parallel!(x_new)
109        .map(|&x0| {
110            // Compute weighted moments
111            let mut s0 = 0.0;
112            let mut s1 = 0.0;
113            let mut s2 = 0.0;
114            let mut t0 = 0.0;
115            let mut t1 = 0.0;
116
117            for i in 0..n {
118                let u = (x[i] - x0) / bandwidth;
119                let w = kernel_fn(u);
120                let d = x[i] - x0;
121
122                s0 += w;
123                s1 += w * d;
124                s2 += w * d * d;
125                t0 += w * y[i];
126                t1 += w * y[i] * d;
127            }
128
129            // Solve local linear regression
130            let det = s0 * s2 - s1 * s1;
131            if det.abs() > 1e-10 {
132                (s2 * t0 - s1 * t1) / det
133            } else if s0 > 1e-10 {
134                t0 / s0
135            } else {
136                0.0
137            }
138        })
139        .collect()
140}
141
142/// Accumulate weighted normal equations (X'WX and X'Wy) for local polynomial fit.
143fn accumulate_weighted_normal_equations(
144    x: &[f64],
145    y: &[f64],
146    x0: f64,
147    bandwidth: f64,
148    p: usize,
149    kernel_fn: impl Fn(f64) -> f64,
150) -> (Vec<f64>, Vec<f64>) {
151    let n = x.len();
152    let mut xtx = vec![0.0; p * p];
153    let mut xty = vec![0.0; p];
154
155    for i in 0..n {
156        let u = (x[i] - x0) / bandwidth;
157        let w = kernel_fn(u);
158        let d = x[i] - x0;
159
160        for j in 0..p {
161            let w_dj = w * d.powi(j as i32);
162            for k in 0..p {
163                xtx[j * p + k] += w_dj * d.powi(k as i32);
164            }
165            xty[j] += w_dj * y[i];
166        }
167    }
168
169    (xtx, xty)
170}
171
172/// Solve a linear system using Gaussian elimination with partial pivoting.
173/// Returns the solution vector, or a zero vector if the system is singular.
174/// Gaussian elimination with partial pivoting (forward pass).
175/// Find the row with the largest absolute value in column `col` at or below the diagonal.
176fn find_pivot(a: &[f64], p: usize, col: usize) -> usize {
177    let mut max_idx = col;
178    for j in (col + 1)..p {
179        if a[j * p + col].abs() > a[max_idx * p + col].abs() {
180            max_idx = j;
181        }
182    }
183    max_idx
184}
185
186/// Swap two rows in both the matrix `a` and the RHS vector `b`.
187fn swap_rows(a: &mut [f64], b: &mut [f64], p: usize, row_a: usize, row_b: usize) {
188    for k in 0..p {
189        a.swap(row_a * p + k, row_b * p + k);
190    }
191    b.swap(row_a, row_b);
192}
193
194/// Subtract a scaled copy of the pivot row from all rows below it.
195fn eliminate_below(a: &mut [f64], b: &mut [f64], p: usize, pivot_row: usize) {
196    let pivot = a[pivot_row * p + pivot_row];
197    for j in (pivot_row + 1)..p {
198        let factor = a[j * p + pivot_row] / pivot;
199        for k in pivot_row..p {
200            a[j * p + k] -= factor * a[pivot_row * p + k];
201        }
202        b[j] -= factor * b[pivot_row];
203    }
204}
205
206fn forward_eliminate(a: &mut [f64], b: &mut [f64], p: usize) {
207    for i in 0..p {
208        let max_idx = find_pivot(a, p, i);
209        if max_idx != i {
210            swap_rows(a, b, p, i, max_idx);
211        }
212
213        if a[i * p + i].abs() < 1e-10 {
214            continue;
215        }
216
217        eliminate_below(a, b, p, i);
218    }
219}
220
221/// Back substitution for an upper-triangular system.
222fn back_substitute(a: &[f64], b: &[f64], p: usize) -> Vec<f64> {
223    let mut coefs = vec![0.0; p];
224    for i in (0..p).rev() {
225        let mut sum = b[i];
226        for j in (i + 1)..p {
227            sum -= a[i * p + j] * coefs[j];
228        }
229        if a[i * p + i].abs() > 1e-10 {
230            coefs[i] = sum / a[i * p + i];
231        }
232    }
233    coefs
234}
235
236fn solve_gaussian(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
237    forward_eliminate(a, b, p);
238    back_substitute(a, b, p)
239}
240
241/// Solve a linear system Ax = b via Gaussian elimination with partial pivoting.
242///
243/// Public wrapper for use by other modules (e.g., `fregre_basis_cv`).
244/// `a` is a p×p matrix in row-major order, `b` is the RHS vector of length p.
245/// Both are modified in place.
246pub fn solve_gaussian_pub(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
247    solve_gaussian(a, b, p)
248}
249
250/// Local polynomial regression smoother.
251///
252/// # Arguments
253/// * `x` - Predictor values
254/// * `y` - Response values
255/// * `x_new` - Points at which to evaluate the smoother
256/// * `bandwidth` - Kernel bandwidth
257/// * `degree` - Polynomial degree
258/// * `kernel` - Kernel type
259///
260/// # Returns
261/// Smoothed values at x_new
262pub fn local_polynomial(
263    x: &[f64],
264    y: &[f64],
265    x_new: &[f64],
266    bandwidth: f64,
267    degree: usize,
268    kernel: &str,
269) -> Vec<f64> {
270    let n = x.len();
271    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 || degree == 0 {
272        return vec![0.0; x_new.len()];
273    }
274
275    if degree == 1 {
276        return local_linear(x, y, x_new, bandwidth, kernel);
277    }
278
279    let kernel_fn = get_kernel(kernel);
280    let p = degree + 1; // Number of coefficients
281
282    slice_maybe_parallel!(x_new)
283        .map(|&x0| {
284            let (mut xtx, mut xty) =
285                accumulate_weighted_normal_equations(x, y, x0, bandwidth, p, kernel_fn);
286            let coefs = solve_gaussian(&mut xtx, &mut xty, p);
287            coefs[0]
288        })
289        .collect()
290}
291
292/// k-Nearest Neighbors smoother.
293///
294/// # Arguments
295/// * `x` - Predictor values
296/// * `y` - Response values
297/// * `x_new` - Points at which to evaluate the smoother
298/// * `k` - Number of neighbors
299///
300/// # Returns
301/// Smoothed values at x_new
302pub fn knn_smoother(x: &[f64], y: &[f64], x_new: &[f64], k: usize) -> Vec<f64> {
303    let n = x.len();
304    if n == 0 || y.len() != n || x_new.is_empty() || k == 0 {
305        return vec![0.0; x_new.len()];
306    }
307
308    let k = k.min(n);
309
310    slice_maybe_parallel!(x_new)
311        .map(|&x0| {
312            // Compute distances
313            let mut distances: Vec<(usize, f64)> = x
314                .iter()
315                .enumerate()
316                .map(|(i, &xi)| (i, (xi - x0).abs()))
317                .collect();
318
319            // Partial sort to get k nearest
320            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
321
322            // Average of k nearest neighbors
323            let sum: f64 = distances.iter().take(k).map(|(i, _)| y[*i]).sum();
324            sum / k as f64
325        })
326        .collect()
327}
328
329/// Compute smoothing matrix for Nadaraya-Watson.
330///
331/// Returns the smoother matrix S such that y_hat = S * y.
332pub fn smoothing_matrix_nw(x: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
333    let n = x.len();
334    if n == 0 || bandwidth <= 0.0 {
335        return Vec::new();
336    }
337
338    let kernel_fn = get_kernel(kernel);
339    let mut s = vec![0.0; n * n];
340
341    for i in 0..n {
342        let mut row_sum = 0.0;
343        for j in 0..n {
344            let u = (x[j] - x[i]) / bandwidth;
345            s[i + j * n] = kernel_fn(u);
346            row_sum += s[i + j * n];
347        }
348        if row_sum > 1e-10 {
349            for j in 0..n {
350                s[i + j * n] /= row_sum;
351            }
352        }
353    }
354
355    s
356}
357
358// ─── Cross-Validation for Kernel Smoothers ──────────────────────────────────
359
360/// CV criterion type for bandwidth selection.
361#[derive(Debug, Clone, Copy, PartialEq)]
362pub enum CvCriterion {
363    /// Leave-one-out cross-validation (R's `CV.S`).
364    Cv,
365    /// Generalized cross-validation (R's `GCV.S`).
366    Gcv,
367}
368
369/// Result of bandwidth optimization.
370#[derive(Debug, Clone)]
371pub struct OptimBandwidthResult {
372    /// Optimal bandwidth.
373    pub h_opt: f64,
374    /// Criterion used.
375    pub criterion: CvCriterion,
376    /// Criterion value at optimal h.
377    pub value: f64,
378}
379
380/// LOO-CV score for a kernel smoother (R's `CV.S`).
381///
382/// Computes the leave-one-out CV score by zeroing the diagonal of the
383/// smoothing matrix, re-normalizing rows, and computing mean squared error.
384///
385/// # Arguments
386/// * `x` — Predictor values
387/// * `y` — Response values
388/// * `bandwidth` — Kernel bandwidth
389/// * `kernel` — Kernel type ("gaussian", "epanechnikov", "tricube")
390///
391/// # Returns
392/// Mean squared LOO prediction error, or `INFINITY` if inputs are invalid.
393pub fn cv_smoother(x: &[f64], y: &[f64], bandwidth: f64, kernel: &str) -> f64 {
394    let n = x.len();
395    if n < 2 || y.len() != n || bandwidth <= 0.0 {
396        return f64::INFINITY;
397    }
398
399    // Get the smoother matrix S
400    let mut s = smoothing_matrix_nw(x, bandwidth, kernel);
401    if s.is_empty() {
402        return f64::INFINITY;
403    }
404
405    // Zero the diagonal → S_cv (LOO smoother)
406    for i in 0..n {
407        s[i + i * n] = 0.0;
408    }
409
410    // Re-normalize each row so it sums to 1
411    for i in 0..n {
412        let row_sum: f64 = (0..n).map(|j| s[i + j * n]).sum();
413        if row_sum > 1e-10 {
414            for j in 0..n {
415                s[i + j * n] /= row_sum;
416            }
417        }
418    }
419
420    // Compute y_hat = S_cv * y, then MSE
421    let mut mse = 0.0;
422    for i in 0..n {
423        let y_hat: f64 = (0..n).map(|j| s[i + j * n] * y[j]).sum();
424        let resid = y[i] - y_hat;
425        mse += resid * resid;
426    }
427    mse / n as f64
428}
429
430/// GCV score for a kernel smoother (R's `GCV.S`).
431///
432/// Computes `(RSS / n) / (1 - tr(S) / n)²`.
433///
434/// # Arguments
435/// * `x` — Predictor values
436/// * `y` — Response values
437/// * `bandwidth` — Kernel bandwidth
438/// * `kernel` — Kernel type
439///
440/// # Returns
441/// GCV score, or `INFINITY` if inputs are invalid or denominator is near zero.
442pub fn gcv_smoother(x: &[f64], y: &[f64], bandwidth: f64, kernel: &str) -> f64 {
443    let n = x.len();
444    if n < 2 || y.len() != n || bandwidth <= 0.0 {
445        return f64::INFINITY;
446    }
447
448    let s = smoothing_matrix_nw(x, bandwidth, kernel);
449    if s.is_empty() {
450        return f64::INFINITY;
451    }
452
453    // y_hat = S * y
454    let mut rss = 0.0;
455    for i in 0..n {
456        let y_hat: f64 = (0..n).map(|j| s[i + j * n] * y[j]).sum();
457        let resid = y[i] - y_hat;
458        rss += resid * resid;
459    }
460
461    // trace(S) = sum of diagonal
462    let trace_s: f64 = (0..n).map(|i| s[i + i * n]).sum();
463
464    let denom = 1.0 - trace_s / n as f64;
465    if denom.abs() < 1e-10 {
466        f64::INFINITY
467    } else {
468        (rss / n as f64) / (denom * denom)
469    }
470}
471
472/// Bandwidth optimizer for kernel smoothers (R's `optim.np`).
473///
474/// Grid search over evenly-spaced bandwidths, selecting the one that
475/// minimizes the specified criterion (CV or GCV).
476///
477/// # Arguments
478/// * `x` — Predictor values
479/// * `y` — Response values
480/// * `h_range` — Optional `(h_min, h_max)`. Defaults to `(h_default / 5, h_default * 5)`
481///   where `h_default = (x_max - x_min) / n^0.2`.
482/// * `criterion` — CV or GCV
483/// * `kernel` — Kernel type
484/// * `n_grid` — Number of grid points (default: 50)
485pub fn optim_bandwidth(
486    x: &[f64],
487    y: &[f64],
488    h_range: Option<(f64, f64)>,
489    criterion: CvCriterion,
490    kernel: &str,
491    n_grid: usize,
492) -> OptimBandwidthResult {
493    let n = x.len();
494    let n_grid = n_grid.max(2);
495
496    // Determine search range
497    let (h_min, h_max) = match h_range {
498        Some((lo, hi)) if lo > 0.0 && hi > lo => (lo, hi),
499        _ => {
500            let x_min = x.iter().cloned().fold(f64::INFINITY, f64::min);
501            let x_max = x.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
502            let h_default = (x_max - x_min) / (n as f64).powf(0.2);
503            let h_default = h_default.max(1e-10);
504            (h_default / 5.0, h_default * 5.0)
505        }
506    };
507
508    let score_fn = match criterion {
509        CvCriterion::Cv => cv_smoother,
510        CvCriterion::Gcv => gcv_smoother,
511    };
512
513    let mut best_h = h_min;
514    let mut best_score = f64::INFINITY;
515
516    for i in 0..n_grid {
517        let h = h_min + (h_max - h_min) * i as f64 / (n_grid - 1) as f64;
518        let score = score_fn(x, y, h, kernel);
519        if score < best_score {
520            best_score = score;
521            best_h = h;
522        }
523    }
524
525    OptimBandwidthResult {
526        h_opt: best_h,
527        criterion,
528        value: best_score,
529    }
530}
531
532// ─── kNN CV Functions ───────────────────────────────────────────────────────
533
534/// Result of kNN k-selection by cross-validation.
535#[derive(Debug, Clone)]
536pub struct KnnCvResult {
537    /// Optimal k (number of neighbors).
538    pub optimal_k: usize,
539    /// CV error for each k tested (index 0 = k=1).
540    pub cv_errors: Vec<f64>,
541}
542
543/// Global LOO-CV for kNN k selection (R's `knn.gcv`).
544///
545/// For each candidate k, computes LOO prediction error using a
546/// kernel-weighted kNN estimator with Epanechnikov kernel.
547///
548/// # Arguments
549/// * `x` — Predictor values
550/// * `y` — Response values
551/// * `max_k` — Maximum k to test (tests k = 1, 2, …, max_k)
552pub fn knn_gcv(x: &[f64], y: &[f64], max_k: usize) -> KnnCvResult {
553    let n = x.len();
554    let max_k = max_k.min(n.saturating_sub(1)).max(1);
555
556    // Precompute sorted distances from each point to all others
557    let mut sorted_neighbors: Vec<Vec<(usize, f64)>> = Vec::with_capacity(n);
558    for i in 0..n {
559        let mut dists: Vec<(usize, f64)> = (0..n)
560            .filter(|&j| j != i)
561            .map(|j| (j, (x[j] - x[i]).abs()))
562            .collect();
563        dists.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
564        sorted_neighbors.push(dists);
565    }
566
567    let mut cv_errors = Vec::with_capacity(max_k);
568
569    for k in 1..=max_k {
570        let mut mse = 0.0;
571        for i in 0..n {
572            let neighbors = &sorted_neighbors[i];
573            // Bandwidth: midpoint between k-th and (k+1)-th NN distances
574            let d_k = if k <= neighbors.len() {
575                neighbors[k - 1].1
576            } else {
577                neighbors.last().map(|x| x.1).unwrap_or(1.0)
578            };
579            let d_k1 = if k < neighbors.len() {
580                neighbors[k].1
581            } else {
582                d_k * 2.0
583            };
584            let h = (d_k + d_k1) / 2.0;
585            let h = h.max(1e-10);
586
587            // Epanechnikov kernel weighted prediction
588            let mut num = 0.0;
589            let mut den = 0.0;
590            for &(j, dist) in neighbors.iter().take(k) {
591                let u = dist / h;
592                let w = epanechnikov_kernel(u);
593                num += w * y[j];
594                den += w;
595            }
596            let y_hat = if den > 1e-10 { num / den } else { y[i] };
597            mse += (y[i] - y_hat).powi(2);
598        }
599        cv_errors.push(mse / n as f64);
600    }
601
602    let optimal_k = cv_errors
603        .iter()
604        .enumerate()
605        .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
606        .map(|(i, _)| i + 1)
607        .unwrap_or(1);
608
609    KnnCvResult {
610        optimal_k,
611        cv_errors,
612    }
613}
614
615/// Local (per-observation) LOO-CV for kNN k selection (R's `knn.lcv`).
616///
617/// For each observation, independently selects the best k by minimizing
618/// the absolute LOO prediction error at that point.
619///
620/// # Arguments
621/// * `x` — Predictor values
622/// * `y` — Response values
623/// * `max_k` — Maximum k to test
624///
625/// # Returns
626/// Vector of per-observation optimal k values (length n).
627pub fn knn_lcv(x: &[f64], y: &[f64], max_k: usize) -> Vec<usize> {
628    let n = x.len();
629    let max_k = max_k.min(n.saturating_sub(1)).max(1);
630
631    let mut per_obs_k = vec![1usize; n];
632
633    for i in 0..n {
634        // Sort neighbors by distance (excluding self)
635        let mut neighbors: Vec<(usize, f64)> = (0..n)
636            .filter(|&j| j != i)
637            .map(|j| (j, (x[j] - x[i]).abs()))
638            .collect();
639        neighbors.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
640
641        let mut best_k = 1;
642        let mut best_err = f64::INFINITY;
643
644        for k in 1..=max_k {
645            // Simple kNN average of k nearest neighbors
646            let sum: f64 = neighbors.iter().take(k).map(|&(j, _)| y[j]).sum();
647            let y_hat = sum / k as f64;
648            let err = (y[i] - y_hat).abs();
649            if err < best_err {
650                best_err = err;
651                best_k = k;
652            }
653        }
654        per_obs_k[i] = best_k;
655    }
656
657    per_obs_k
658}
659
660#[cfg(test)]
661mod tests {
662    use super::*;
663
664    fn uniform_grid(n: usize) -> Vec<f64> {
665        (0..n).map(|i| i as f64 / (n - 1) as f64).collect()
666    }
667
668    // ============== Nadaraya-Watson tests ==============
669
670    #[test]
671    fn test_nw_constant_data() {
672        let x = uniform_grid(20);
673        let y: Vec<f64> = vec![5.0; 20];
674
675        let y_smooth = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
676
677        // Smoothing constant data should return constant
678        for &yi in &y_smooth {
679            assert!(
680                (yi - 5.0).abs() < 0.1,
681                "Constant data should remain constant"
682            );
683        }
684    }
685
686    #[test]
687    fn test_nw_linear_data() {
688        let x = uniform_grid(50);
689        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
690
691        let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "gaussian");
692
693        // Linear data should be approximately preserved (with some edge effects)
694        for i in 10..40 {
695            let expected = 2.0 * x[i] + 1.0;
696            assert!(
697                (y_smooth[i] - expected).abs() < 0.2,
698                "Linear trend should be approximately preserved"
699            );
700        }
701    }
702
703    #[test]
704    fn test_nw_gaussian_vs_epanechnikov() {
705        let x = uniform_grid(30);
706        let y: Vec<f64> = x
707            .iter()
708            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
709            .collect();
710
711        let y_gauss = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
712        let y_epan = nadaraya_watson(&x, &y, &x, 0.1, "epanechnikov");
713
714        // Both should produce valid output
715        assert_eq!(y_gauss.len(), 30);
716        assert_eq!(y_epan.len(), 30);
717
718        // They should be different (different kernels)
719        let diff: f64 = y_gauss
720            .iter()
721            .zip(&y_epan)
722            .map(|(a, b)| (a - b).abs())
723            .sum();
724        assert!(
725            diff > 0.0,
726            "Different kernels should give different results"
727        );
728    }
729
730    #[test]
731    fn test_nw_invalid_input() {
732        // Empty input
733        let result = nadaraya_watson(&[], &[], &[0.5], 0.1, "gaussian");
734        assert_eq!(result, vec![0.0]);
735
736        // Mismatched lengths
737        let result = nadaraya_watson(&[0.0, 1.0], &[1.0], &[0.5], 0.1, "gaussian");
738        assert_eq!(result, vec![0.0]);
739
740        // Zero bandwidth
741        let result = nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.0, "gaussian");
742        assert_eq!(result, vec![0.0]);
743    }
744
745    // ============== Local linear tests ==============
746
747    #[test]
748    fn test_ll_constant_data() {
749        let x = uniform_grid(20);
750        let y: Vec<f64> = vec![3.0; 20];
751
752        let y_smooth = local_linear(&x, &y, &x, 0.15, "gaussian");
753
754        for &yi in &y_smooth {
755            assert!((yi - 3.0).abs() < 0.1, "Constant should remain constant");
756        }
757    }
758
759    #[test]
760    fn test_ll_linear_data_exact() {
761        let x = uniform_grid(30);
762        let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
763
764        let y_smooth = local_linear(&x, &y, &x, 0.2, "gaussian");
765
766        // Local linear should fit linear data exactly (in interior)
767        for i in 5..25 {
768            let expected = 3.0 * x[i] + 2.0;
769            assert!(
770                (y_smooth[i] - expected).abs() < 0.1,
771                "Local linear should fit linear data well"
772            );
773        }
774    }
775
776    #[test]
777    fn test_ll_invalid_input() {
778        let result = local_linear(&[], &[], &[0.5], 0.1, "gaussian");
779        assert_eq!(result, vec![0.0]);
780
781        let result = local_linear(&[0.0, 1.0], &[1.0, 2.0], &[0.5], -0.1, "gaussian");
782        assert_eq!(result, vec![0.0]);
783    }
784
785    // ============== Local polynomial tests ==============
786
787    #[test]
788    fn test_lp_degree1_equals_local_linear() {
789        let x = uniform_grid(25);
790        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
791
792        let y_ll = local_linear(&x, &y, &x, 0.15, "gaussian");
793        let y_lp = local_polynomial(&x, &y, &x, 0.15, 1, "gaussian");
794
795        for i in 0..25 {
796            assert!(
797                (y_ll[i] - y_lp[i]).abs() < 1e-10,
798                "Degree 1 should equal local linear"
799            );
800        }
801    }
802
803    #[test]
804    fn test_lp_quadratic_data() {
805        let x = uniform_grid(40);
806        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
807
808        let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "gaussian");
809
810        // Local quadratic should fit quadratic data well in interior
811        for i in 8..32 {
812            let expected = x[i] * x[i];
813            assert!(
814                (y_smooth[i] - expected).abs() < 0.1,
815                "Local quadratic should fit quadratic data"
816            );
817        }
818    }
819
820    #[test]
821    fn test_lp_invalid_input() {
822        // Zero degree
823        let result = local_polynomial(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, 0, "gaussian");
824        assert_eq!(result, vec![0.0]);
825
826        // Empty input
827        let result = local_polynomial(&[], &[], &[0.5], 0.1, 2, "gaussian");
828        assert_eq!(result, vec![0.0]);
829    }
830
831    // ============== KNN smoother tests ==============
832
833    #[test]
834    fn test_knn_k1_nearest() {
835        let x = vec![0.0, 0.5, 1.0];
836        let y = vec![1.0, 2.0, 3.0];
837
838        let result = knn_smoother(&x, &y, &[0.1, 0.6, 0.9], 1);
839
840        // k=1 should return the nearest neighbor's y value
841        assert!((result[0] - 1.0).abs() < 1e-10, "0.1 nearest to 0.0 -> 1.0");
842        assert!((result[1] - 2.0).abs() < 1e-10, "0.6 nearest to 0.5 -> 2.0");
843        assert!((result[2] - 3.0).abs() < 1e-10, "0.9 nearest to 1.0 -> 3.0");
844    }
845
846    #[test]
847    fn test_knn_k_equals_n_is_mean() {
848        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
849        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
850        let expected_mean = 3.0;
851
852        let result = knn_smoother(&x, &y, &[0.5], 5);
853
854        assert!(
855            (result[0] - expected_mean).abs() < 1e-10,
856            "k=n should return mean"
857        );
858    }
859
860    #[test]
861    fn test_knn_invalid_input() {
862        let result = knn_smoother(&[], &[], &[0.5], 3);
863        assert_eq!(result, vec![0.0]);
864
865        let result = knn_smoother(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0);
866        assert_eq!(result, vec![0.0]);
867    }
868
869    // ============== Smoothing matrix tests ==============
870
871    #[test]
872    fn test_smoothing_matrix_row_stochastic() {
873        let x = uniform_grid(10);
874        let s = smoothing_matrix_nw(&x, 0.2, "gaussian");
875
876        assert_eq!(s.len(), 100);
877
878        // Each row should sum to 1 (row stochastic)
879        for i in 0..10 {
880            let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
881            assert!(
882                (row_sum - 1.0).abs() < 1e-10,
883                "Row {} should sum to 1, got {}",
884                i,
885                row_sum
886            );
887        }
888    }
889
890    #[test]
891    fn test_smoothing_matrix_invalid_input() {
892        let result = smoothing_matrix_nw(&[], 0.1, "gaussian");
893        assert!(result.is_empty());
894
895        let result = smoothing_matrix_nw(&[0.0, 1.0], 0.0, "gaussian");
896        assert!(result.is_empty());
897    }
898
899    #[test]
900    fn test_nan_nw_no_panic() {
901        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
902        let mut y = vec![0.0, 1.0, 2.0, 1.0, 0.0];
903        y[2] = f64::NAN;
904        let result = nadaraya_watson(&x, &y, &x, 0.3, "gaussian");
905        assert_eq!(result.len(), x.len());
906        // NaN should propagate but not panic
907    }
908
909    #[test]
910    fn test_n1_smoother() {
911        // Single data point
912        let x = vec![0.5];
913        let y = vec![3.0];
914        let x_new = vec![0.5];
915        let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian");
916        assert_eq!(result.len(), 1);
917        assert!(
918            (result[0] - 3.0).abs() < 1e-6,
919            "Single point smoother should return the value"
920        );
921    }
922
923    #[test]
924    fn test_duplicate_x_smoother() {
925        // Duplicate x values
926        let x = vec![0.0, 0.0, 0.5, 1.0, 1.0];
927        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
928        let x_new = vec![0.0, 0.5, 1.0];
929        let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian");
930        assert_eq!(result.len(), 3);
931        for v in &result {
932            assert!(v.is_finite());
933        }
934    }
935
936    // ============== CV smoother tests ==============
937
938    #[test]
939    fn test_cv_smoother_linear_data() {
940        let x = uniform_grid(30);
941        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
942        let cv = cv_smoother(&x, &y, 0.2, "gaussian");
943        assert!(cv.is_finite());
944        assert!(cv >= 0.0);
945        assert!(cv < 1.0, "CV error for smooth linear data should be small");
946    }
947
948    #[test]
949    fn test_cv_smoother_invalid() {
950        assert_eq!(cv_smoother(&[], &[], 0.1, "gaussian"), f64::INFINITY);
951        assert_eq!(
952            cv_smoother(&[0.0, 1.0], &[1.0, 2.0], -0.1, "gaussian"),
953            f64::INFINITY
954        );
955    }
956
957    #[test]
958    fn test_gcv_smoother_linear_data() {
959        let x = uniform_grid(30);
960        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
961        let gcv = gcv_smoother(&x, &y, 0.2, "gaussian");
962        assert!(gcv.is_finite());
963        assert!(gcv >= 0.0);
964    }
965
966    #[test]
967    fn test_gcv_smoother_invalid() {
968        assert_eq!(gcv_smoother(&[], &[], 0.1, "gaussian"), f64::INFINITY);
969    }
970
971    #[test]
972    fn test_optim_bandwidth_returns_valid() {
973        let x = uniform_grid(30);
974        let y: Vec<f64> = x
975            .iter()
976            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
977            .collect();
978
979        let result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "gaussian", 20);
980        assert!(result.h_opt > 0.0);
981        assert!(result.value.is_finite());
982        assert_eq!(result.criterion, CvCriterion::Gcv);
983    }
984
985    #[test]
986    fn test_optim_bandwidth_cv_vs_gcv() {
987        let x = uniform_grid(25);
988        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
989
990        let cv_result = optim_bandwidth(&x, &y, None, CvCriterion::Cv, "gaussian", 20);
991        let gcv_result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "gaussian", 20);
992
993        assert!(cv_result.h_opt > 0.0);
994        assert!(gcv_result.h_opt > 0.0);
995    }
996
997    #[test]
998    fn test_optim_bandwidth_custom_range() {
999        let x = uniform_grid(20);
1000        let y: Vec<f64> = x.to_vec();
1001        let result = optim_bandwidth(&x, &y, Some((0.05, 0.5)), CvCriterion::Cv, "gaussian", 10);
1002        assert!(result.h_opt >= 0.05);
1003        assert!(result.h_opt <= 0.5);
1004    }
1005
1006    // ============== kNN CV tests ==============
1007
1008    #[test]
1009    fn test_knn_gcv_returns_valid() {
1010        let x = uniform_grid(20);
1011        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
1012
1013        let result = knn_gcv(&x, &y, 10);
1014        assert!(result.optimal_k >= 1);
1015        assert!(result.optimal_k <= 10);
1016        assert_eq!(result.cv_errors.len(), 10);
1017        for &e in &result.cv_errors {
1018            assert!(e.is_finite());
1019            assert!(e >= 0.0);
1020        }
1021    }
1022
1023    #[test]
1024    fn test_knn_gcv_constant_data() {
1025        let x = uniform_grid(15);
1026        let y = vec![5.0; 15];
1027        let result = knn_gcv(&x, &y, 5);
1028        // For constant data, error should be near zero for any k
1029        for &e in &result.cv_errors {
1030            assert!(e < 0.01, "Constant data: CV error should be near zero");
1031        }
1032    }
1033
1034    #[test]
1035    fn test_knn_lcv_returns_valid() {
1036        let x = uniform_grid(15);
1037        let y: Vec<f64> = x.to_vec();
1038
1039        let result = knn_lcv(&x, &y, 5);
1040        assert_eq!(result.len(), 15);
1041        for &k in &result {
1042            assert!(k >= 1);
1043            assert!(k <= 5);
1044        }
1045    }
1046
1047    #[test]
1048    fn test_knn_lcv_constant_data() {
1049        let x = uniform_grid(10);
1050        let y = vec![3.0; 10];
1051        let result = knn_lcv(&x, &y, 5);
1052        assert_eq!(result.len(), 10);
1053        // For constant data, all k values should give zero error
1054        // so k=1 is optimal (first tested)
1055        for &k in &result {
1056            assert!(k >= 1);
1057        }
1058    }
1059}
fdars_core/smoothing.rs

fdars_core/
smoothing.rs