fdars_core/
smoothing.rs

1//! Smoothing functions for functional data.
2//!
3//! This module provides kernel-based smoothing methods including
4//! Nadaraya-Watson, local linear, and local polynomial regression.
5
6use crate::slice_maybe_parallel;
7#[cfg(feature = "parallel")]
8use rayon::iter::ParallelIterator;
9
10/// Gaussian kernel function.
11fn gaussian_kernel(u: f64) -> f64 {
12    (-0.5 * u * u).exp() / (2.0 * std::f64::consts::PI).sqrt()
13}
14
15/// Epanechnikov kernel function.
16fn epanechnikov_kernel(u: f64) -> f64 {
17    if u.abs() <= 1.0 {
18        0.75 * (1.0 - u * u)
19    } else {
20        0.0
21    }
22}
23
24/// Tri-cube kernel function (used by R's loess()).
25fn tricube_kernel(u: f64) -> f64 {
26    let abs_u = u.abs();
27    if abs_u < 1.0 {
28        (1.0 - abs_u.powi(3)).powi(3)
29    } else {
30        0.0
31    }
32}
33
34/// Get kernel function by name.
35fn get_kernel(kernel_type: &str) -> fn(f64) -> f64 {
36    match kernel_type.to_lowercase().as_str() {
37        "epanechnikov" | "epan" => epanechnikov_kernel,
38        "tricube" | "tri-cube" => tricube_kernel,
39        _ => gaussian_kernel,
40    }
41}
42
43/// Nadaraya-Watson kernel smoother.
44///
45/// # Arguments
46/// * `x` - Predictor values
47/// * `y` - Response values
48/// * `x_new` - Points at which to evaluate the smoother
49/// * `bandwidth` - Kernel bandwidth
50/// * `kernel` - Kernel type ("gaussian" or "epanechnikov")
51///
52/// # Returns
53/// Smoothed values at x_new
54pub fn nadaraya_watson(
55    x: &[f64],
56    y: &[f64],
57    x_new: &[f64],
58    bandwidth: f64,
59    kernel: &str,
60) -> Vec<f64> {
61    let n = x.len();
62    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
63        return vec![0.0; x_new.len()];
64    }
65
66    let kernel_fn = get_kernel(kernel);
67
68    slice_maybe_parallel!(x_new)
69        .map(|&x0| {
70            let mut num = 0.0;
71            let mut denom = 0.0;
72
73            for i in 0..n {
74                let u = (x[i] - x0) / bandwidth;
75                let w = kernel_fn(u);
76                num += w * y[i];
77                denom += w;
78            }
79
80            if denom > 1e-10 {
81                num / denom
82            } else {
83                0.0
84            }
85        })
86        .collect()
87}
88
89/// Local linear regression smoother.
90///
91/// # Arguments
92/// * `x` - Predictor values
93/// * `y` - Response values
94/// * `x_new` - Points at which to evaluate the smoother
95/// * `bandwidth` - Kernel bandwidth
96/// * `kernel` - Kernel type
97///
98/// # Returns
99/// Smoothed values at x_new
100pub fn local_linear(x: &[f64], y: &[f64], x_new: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
101    let n = x.len();
102    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
103        return vec![0.0; x_new.len()];
104    }
105
106    let kernel_fn = get_kernel(kernel);
107
108    slice_maybe_parallel!(x_new)
109        .map(|&x0| {
110            // Compute weighted moments
111            let mut s0 = 0.0;
112            let mut s1 = 0.0;
113            let mut s2 = 0.0;
114            let mut t0 = 0.0;
115            let mut t1 = 0.0;
116
117            for i in 0..n {
118                let u = (x[i] - x0) / bandwidth;
119                let w = kernel_fn(u);
120                let d = x[i] - x0;
121
122                s0 += w;
123                s1 += w * d;
124                s2 += w * d * d;
125                t0 += w * y[i];
126                t1 += w * y[i] * d;
127            }
128
129            // Solve local linear regression
130            let det = s0 * s2 - s1 * s1;
131            if det.abs() > 1e-10 {
132                (s2 * t0 - s1 * t1) / det
133            } else if s0 > 1e-10 {
134                t0 / s0
135            } else {
136                0.0
137            }
138        })
139        .collect()
140}
141
142/// Accumulate weighted normal equations (X'WX and X'Wy) for local polynomial fit.
143fn accumulate_weighted_normal_equations(
144    x: &[f64],
145    y: &[f64],
146    x0: f64,
147    bandwidth: f64,
148    p: usize,
149    kernel_fn: impl Fn(f64) -> f64,
150) -> (Vec<f64>, Vec<f64>) {
151    let n = x.len();
152    let mut xtx = vec![0.0; p * p];
153    let mut xty = vec![0.0; p];
154
155    for i in 0..n {
156        let u = (x[i] - x0) / bandwidth;
157        let w = kernel_fn(u);
158        let d = x[i] - x0;
159
160        for j in 0..p {
161            let w_dj = w * d.powi(j as i32);
162            for k in 0..p {
163                xtx[j * p + k] += w_dj * d.powi(k as i32);
164            }
165            xty[j] += w_dj * y[i];
166        }
167    }
168
169    (xtx, xty)
170}
171
172/// Solve a linear system using Gaussian elimination with partial pivoting.
173/// Returns the solution vector, or a zero vector if the system is singular.
174/// Gaussian elimination with partial pivoting (forward pass).
175/// Find the row with the largest absolute value in column `col` at or below the diagonal.
176fn find_pivot(a: &[f64], p: usize, col: usize) -> usize {
177    let mut max_idx = col;
178    for j in (col + 1)..p {
179        if a[j * p + col].abs() > a[max_idx * p + col].abs() {
180            max_idx = j;
181        }
182    }
183    max_idx
184}
185
186/// Swap two rows in both the matrix `a` and the RHS vector `b`.
187fn swap_rows(a: &mut [f64], b: &mut [f64], p: usize, row_a: usize, row_b: usize) {
188    for k in 0..p {
189        a.swap(row_a * p + k, row_b * p + k);
190    }
191    b.swap(row_a, row_b);
192}
193
194/// Subtract a scaled copy of the pivot row from all rows below it.
195fn eliminate_below(a: &mut [f64], b: &mut [f64], p: usize, pivot_row: usize) {
196    let pivot = a[pivot_row * p + pivot_row];
197    for j in (pivot_row + 1)..p {
198        let factor = a[j * p + pivot_row] / pivot;
199        for k in pivot_row..p {
200            a[j * p + k] -= factor * a[pivot_row * p + k];
201        }
202        b[j] -= factor * b[pivot_row];
203    }
204}
205
206fn forward_eliminate(a: &mut [f64], b: &mut [f64], p: usize) {
207    for i in 0..p {
208        let max_idx = find_pivot(a, p, i);
209        if max_idx != i {
210            swap_rows(a, b, p, i, max_idx);
211        }
212
213        if a[i * p + i].abs() < 1e-10 {
214            continue;
215        }
216
217        eliminate_below(a, b, p, i);
218    }
219}
220
221/// Back substitution for an upper-triangular system.
222fn back_substitute(a: &[f64], b: &[f64], p: usize) -> Vec<f64> {
223    let mut coefs = vec![0.0; p];
224    for i in (0..p).rev() {
225        let mut sum = b[i];
226        for j in (i + 1)..p {
227            sum -= a[i * p + j] * coefs[j];
228        }
229        if a[i * p + i].abs() > 1e-10 {
230            coefs[i] = sum / a[i * p + i];
231        }
232    }
233    coefs
234}
235
236fn solve_gaussian(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
237    forward_eliminate(a, b, p);
238    back_substitute(a, b, p)
239}
240
241/// Solve a linear system Ax = b via Gaussian elimination with partial pivoting.
242///
243/// Public wrapper for use by other modules (e.g., `fregre_basis_cv`).
244/// `a` is a p×p matrix in row-major order, `b` is the RHS vector of length p.
245/// Both are modified in place.
246pub fn solve_gaussian_pub(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
247    solve_gaussian(a, b, p)
248}
249
250/// Local polynomial regression smoother.
251///
252/// # Arguments
253/// * `x` - Predictor values
254/// * `y` - Response values
255/// * `x_new` - Points at which to evaluate the smoother
256/// * `bandwidth` - Kernel bandwidth
257/// * `degree` - Polynomial degree
258/// * `kernel` - Kernel type
259///
260/// # Returns
261/// Smoothed values at x_new
262pub fn local_polynomial(
263    x: &[f64],
264    y: &[f64],
265    x_new: &[f64],
266    bandwidth: f64,
267    degree: usize,
268    kernel: &str,
269) -> Vec<f64> {
270    let n = x.len();
271    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
272        return vec![0.0; x_new.len()];
273    }
274    if degree == 0 {
275        return nadaraya_watson(x, y, x_new, bandwidth, kernel);
276    }
277
278    if degree == 1 {
279        return local_linear(x, y, x_new, bandwidth, kernel);
280    }
281
282    let kernel_fn = get_kernel(kernel);
283    let p = degree + 1; // Number of coefficients
284
285    slice_maybe_parallel!(x_new)
286        .map(|&x0| {
287            let (mut xtx, mut xty) =
288                accumulate_weighted_normal_equations(x, y, x0, bandwidth, p, kernel_fn);
289            let coefs = solve_gaussian(&mut xtx, &mut xty, p);
290            coefs[0]
291        })
292        .collect()
293}
294
295/// k-Nearest Neighbors smoother.
296///
297/// # Arguments
298/// * `x` - Predictor values
299/// * `y` - Response values
300/// * `x_new` - Points at which to evaluate the smoother
301/// * `k` - Number of neighbors
302///
303/// # Returns
304/// Smoothed values at x_new
305pub fn knn_smoother(x: &[f64], y: &[f64], x_new: &[f64], k: usize) -> Vec<f64> {
306    let n = x.len();
307    if n == 0 || y.len() != n || x_new.is_empty() || k == 0 {
308        return vec![0.0; x_new.len()];
309    }
310
311    let k = k.min(n);
312
313    slice_maybe_parallel!(x_new)
314        .map(|&x0| {
315            // Compute distances
316            let mut distances: Vec<(usize, f64)> = x
317                .iter()
318                .enumerate()
319                .map(|(i, &xi)| (i, (xi - x0).abs()))
320                .collect();
321
322            // Partial sort to get k nearest
323            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
324
325            // Average of k nearest neighbors
326            let sum: f64 = distances.iter().take(k).map(|(i, _)| y[*i]).sum();
327            sum / k as f64
328        })
329        .collect()
330}
331
332/// Compute smoothing matrix for Nadaraya-Watson.
333///
334/// Returns the smoother matrix S such that y_hat = S * y.
335pub fn smoothing_matrix_nw(x: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
336    let n = x.len();
337    if n == 0 || bandwidth <= 0.0 {
338        return Vec::new();
339    }
340
341    let kernel_fn = get_kernel(kernel);
342    let mut s = vec![0.0; n * n];
343
344    for i in 0..n {
345        let mut row_sum = 0.0;
346        for j in 0..n {
347            let u = (x[j] - x[i]) / bandwidth;
348            s[i + j * n] = kernel_fn(u);
349            row_sum += s[i + j * n];
350        }
351        if row_sum > 1e-10 {
352            for j in 0..n {
353                s[i + j * n] /= row_sum;
354            }
355        }
356    }
357
358    s
359}
360
361// ─── Cross-Validation for Kernel Smoothers ──────────────────────────────────
362
363/// CV criterion type for bandwidth selection.
364#[derive(Debug, Clone, Copy, PartialEq)]
365pub enum CvCriterion {
366    /// Leave-one-out cross-validation (R's `CV.S`).
367    Cv,
368    /// Generalized cross-validation (R's `GCV.S`).
369    Gcv,
370}
371
372/// Result of bandwidth optimization.
373#[derive(Debug, Clone)]
374pub struct OptimBandwidthResult {
375    /// Optimal bandwidth.
376    pub h_opt: f64,
377    /// Criterion used.
378    pub criterion: CvCriterion,
379    /// Criterion value at optimal h.
380    pub value: f64,
381}
382
383/// LOO-CV score for a kernel smoother (R's `CV.S`).
384///
385/// Computes the leave-one-out CV score by zeroing the diagonal of the
386/// smoothing matrix, re-normalizing rows, and computing mean squared error.
387///
388/// # Arguments
389/// * `x` — Predictor values
390/// * `y` — Response values
391/// * `bandwidth` — Kernel bandwidth
392/// * `kernel` — Kernel type ("gaussian", "epanechnikov", "tricube")
393///
394/// # Returns
395/// Mean squared LOO prediction error, or `INFINITY` if inputs are invalid.
396pub fn cv_smoother(x: &[f64], y: &[f64], bandwidth: f64, kernel: &str) -> f64 {
397    let n = x.len();
398    if n < 2 || y.len() != n || bandwidth <= 0.0 {
399        return f64::INFINITY;
400    }
401
402    // Get the smoother matrix S
403    let mut s = smoothing_matrix_nw(x, bandwidth, kernel);
404    if s.is_empty() {
405        return f64::INFINITY;
406    }
407
408    // Zero the diagonal → S_cv (LOO smoother)
409    for i in 0..n {
410        s[i + i * n] = 0.0;
411    }
412
413    // Re-normalize each row so it sums to 1
414    for i in 0..n {
415        let row_sum: f64 = (0..n).map(|j| s[i + j * n]).sum();
416        if row_sum > 1e-10 {
417            for j in 0..n {
418                s[i + j * n] /= row_sum;
419            }
420        }
421    }
422
423    // Compute y_hat = S_cv * y, then MSE
424    let mut mse = 0.0;
425    for i in 0..n {
426        let y_hat: f64 = (0..n).map(|j| s[i + j * n] * y[j]).sum();
427        let resid = y[i] - y_hat;
428        mse += resid * resid;
429    }
430    mse / n as f64
431}
432
433/// GCV score for a kernel smoother (R's `GCV.S`).
434///
435/// Computes `(RSS / n) / (1 - tr(S) / n)²`.
436///
437/// # Arguments
438/// * `x` — Predictor values
439/// * `y` — Response values
440/// * `bandwidth` — Kernel bandwidth
441/// * `kernel` — Kernel type
442///
443/// # Returns
444/// GCV score, or `INFINITY` if inputs are invalid or denominator is near zero.
445pub fn gcv_smoother(x: &[f64], y: &[f64], bandwidth: f64, kernel: &str) -> f64 {
446    let n = x.len();
447    if n < 2 || y.len() != n || bandwidth <= 0.0 {
448        return f64::INFINITY;
449    }
450
451    let s = smoothing_matrix_nw(x, bandwidth, kernel);
452    if s.is_empty() {
453        return f64::INFINITY;
454    }
455
456    // y_hat = S * y
457    let mut rss = 0.0;
458    for i in 0..n {
459        let y_hat: f64 = (0..n).map(|j| s[i + j * n] * y[j]).sum();
460        let resid = y[i] - y_hat;
461        rss += resid * resid;
462    }
463
464    // trace(S) = sum of diagonal
465    let trace_s: f64 = (0..n).map(|i| s[i + i * n]).sum();
466
467    let denom = 1.0 - trace_s / n as f64;
468    if denom.abs() < 1e-10 {
469        f64::INFINITY
470    } else {
471        (rss / n as f64) / (denom * denom)
472    }
473}
474
475/// Bandwidth optimizer for kernel smoothers (R's `optim.np`).
476///
477/// Grid search over evenly-spaced bandwidths, selecting the one that
478/// minimizes the specified criterion (CV or GCV).
479///
480/// # Arguments
481/// * `x` — Predictor values
482/// * `y` — Response values
483/// * `h_range` — Optional `(h_min, h_max)`. Defaults to `(h_default / 5, h_default * 5)`
484///   where `h_default = (x_max - x_min) / n^0.2`.
485/// * `criterion` — CV or GCV
486/// * `kernel` — Kernel type
487/// * `n_grid` — Number of grid points (default: 50)
488pub fn optim_bandwidth(
489    x: &[f64],
490    y: &[f64],
491    h_range: Option<(f64, f64)>,
492    criterion: CvCriterion,
493    kernel: &str,
494    n_grid: usize,
495) -> OptimBandwidthResult {
496    let n = x.len();
497    let n_grid = n_grid.max(2);
498
499    // Determine search range
500    let (h_min, h_max) = match h_range {
501        Some((lo, hi)) if lo > 0.0 && hi > lo => (lo, hi),
502        _ => {
503            let x_min = x.iter().cloned().fold(f64::INFINITY, f64::min);
504            let x_max = x.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
505            let h_default = (x_max - x_min) / (n as f64).powf(0.2);
506            let h_default = h_default.max(1e-10);
507            (h_default / 5.0, h_default * 5.0)
508        }
509    };
510
511    let score_fn = match criterion {
512        CvCriterion::Cv => cv_smoother,
513        CvCriterion::Gcv => gcv_smoother,
514    };
515
516    let mut best_h = h_min;
517    let mut best_score = f64::INFINITY;
518
519    for i in 0..n_grid {
520        let h = h_min + (h_max - h_min) * i as f64 / (n_grid - 1) as f64;
521        let score = score_fn(x, y, h, kernel);
522        if score < best_score {
523            best_score = score;
524            best_h = h;
525        }
526    }
527
528    OptimBandwidthResult {
529        h_opt: best_h,
530        criterion,
531        value: best_score,
532    }
533}
534
535// ─── kNN CV Functions ───────────────────────────────────────────────────────
536
537/// Result of kNN k-selection by cross-validation.
538#[derive(Debug, Clone)]
539pub struct KnnCvResult {
540    /// Optimal k (number of neighbors).
541    pub optimal_k: usize,
542    /// CV error for each k tested (index 0 = k=1).
543    pub cv_errors: Vec<f64>,
544}
545
546/// Global LOO-CV for kNN k selection (R's `knn.gcv`).
547///
548/// For each candidate k, computes LOO prediction error using a
549/// kernel-weighted kNN estimator with Epanechnikov kernel.
550///
551/// # Arguments
552/// * `x` — Predictor values
553/// * `y` — Response values
554/// * `max_k` — Maximum k to test (tests k = 1, 2, …, max_k)
555pub fn knn_gcv(x: &[f64], y: &[f64], max_k: usize) -> KnnCvResult {
556    let n = x.len();
557    let max_k = max_k.min(n.saturating_sub(1)).max(1);
558
559    // Precompute sorted distances from each point to all others
560    let mut sorted_neighbors: Vec<Vec<(usize, f64)>> = Vec::with_capacity(n);
561    for i in 0..n {
562        let mut dists: Vec<(usize, f64)> = (0..n)
563            .filter(|&j| j != i)
564            .map(|j| (j, (x[j] - x[i]).abs()))
565            .collect();
566        dists.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
567        sorted_neighbors.push(dists);
568    }
569
570    let mut cv_errors = Vec::with_capacity(max_k);
571
572    for k in 1..=max_k {
573        let mut mse = 0.0;
574        for i in 0..n {
575            let neighbors = &sorted_neighbors[i];
576            // Bandwidth: midpoint between k-th and (k+1)-th NN distances
577            let d_k = if k <= neighbors.len() {
578                neighbors[k - 1].1
579            } else {
580                neighbors.last().map(|x| x.1).unwrap_or(1.0)
581            };
582            let d_k1 = if k < neighbors.len() {
583                neighbors[k].1
584            } else {
585                d_k * 2.0
586            };
587            let h = (d_k + d_k1) / 2.0;
588            let h = h.max(1e-10);
589
590            // Epanechnikov kernel weighted prediction
591            let mut num = 0.0;
592            let mut den = 0.0;
593            for &(j, dist) in neighbors.iter().take(k) {
594                let u = dist / h;
595                let w = epanechnikov_kernel(u);
596                num += w * y[j];
597                den += w;
598            }
599            let y_hat = if den > 1e-10 { num / den } else { y[i] };
600            mse += (y[i] - y_hat).powi(2);
601        }
602        cv_errors.push(mse / n as f64);
603    }
604
605    let optimal_k = cv_errors
606        .iter()
607        .enumerate()
608        .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
609        .map(|(i, _)| i + 1)
610        .unwrap_or(1);
611
612    KnnCvResult {
613        optimal_k,
614        cv_errors,
615    }
616}
617
618/// Local (per-observation) LOO-CV for kNN k selection (R's `knn.lcv`).
619///
620/// For each observation, independently selects the best k by minimizing
621/// the absolute LOO prediction error at that point.
622///
623/// # Arguments
624/// * `x` — Predictor values
625/// * `y` — Response values
626/// * `max_k` — Maximum k to test
627///
628/// # Returns
629/// Vector of per-observation optimal k values (length n).
630pub fn knn_lcv(x: &[f64], y: &[f64], max_k: usize) -> Vec<usize> {
631    let n = x.len();
632    let max_k = max_k.min(n.saturating_sub(1)).max(1);
633
634    let mut per_obs_k = vec![1usize; n];
635
636    for i in 0..n {
637        // Sort neighbors by distance (excluding self)
638        let mut neighbors: Vec<(usize, f64)> = (0..n)
639            .filter(|&j| j != i)
640            .map(|j| (j, (x[j] - x[i]).abs()))
641            .collect();
642        neighbors.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
643
644        let mut best_k = 1;
645        let mut best_err = f64::INFINITY;
646
647        for k in 1..=max_k {
648            // Simple kNN average of k nearest neighbors
649            let sum: f64 = neighbors.iter().take(k).map(|&(j, _)| y[j]).sum();
650            let y_hat = sum / k as f64;
651            let err = (y[i] - y_hat).abs();
652            if err < best_err {
653                best_err = err;
654                best_k = k;
655            }
656        }
657        per_obs_k[i] = best_k;
658    }
659
660    per_obs_k
661}
662
663#[cfg(test)]
664mod tests {
665    use super::*;
666
667    fn uniform_grid(n: usize) -> Vec<f64> {
668        (0..n).map(|i| i as f64 / (n - 1) as f64).collect()
669    }
670
671    // ============== Nadaraya-Watson tests ==============
672
673    #[test]
674    fn test_nw_constant_data() {
675        let x = uniform_grid(20);
676        let y: Vec<f64> = vec![5.0; 20];
677
678        let y_smooth = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
679
680        // Smoothing constant data should return constant
681        for &yi in &y_smooth {
682            assert!(
683                (yi - 5.0).abs() < 0.1,
684                "Constant data should remain constant"
685            );
686        }
687    }
688
689    #[test]
690    fn test_nw_linear_data() {
691        let x = uniform_grid(50);
692        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
693
694        let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "gaussian");
695
696        // Linear data should be approximately preserved (with some edge effects)
697        for i in 10..40 {
698            let expected = 2.0 * x[i] + 1.0;
699            assert!(
700                (y_smooth[i] - expected).abs() < 0.2,
701                "Linear trend should be approximately preserved"
702            );
703        }
704    }
705
706    #[test]
707    fn test_nw_gaussian_vs_epanechnikov() {
708        let x = uniform_grid(30);
709        let y: Vec<f64> = x
710            .iter()
711            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
712            .collect();
713
714        let y_gauss = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
715        let y_epan = nadaraya_watson(&x, &y, &x, 0.1, "epanechnikov");
716
717        // Both should produce valid output
718        assert_eq!(y_gauss.len(), 30);
719        assert_eq!(y_epan.len(), 30);
720
721        // They should be different (different kernels)
722        let diff: f64 = y_gauss
723            .iter()
724            .zip(&y_epan)
725            .map(|(a, b)| (a - b).abs())
726            .sum();
727        assert!(
728            diff > 0.0,
729            "Different kernels should give different results"
730        );
731    }
732
733    #[test]
734    fn test_nw_invalid_input() {
735        // Empty input
736        let result = nadaraya_watson(&[], &[], &[0.5], 0.1, "gaussian");
737        assert_eq!(result, vec![0.0]);
738
739        // Mismatched lengths
740        let result = nadaraya_watson(&[0.0, 1.0], &[1.0], &[0.5], 0.1, "gaussian");
741        assert_eq!(result, vec![0.0]);
742
743        // Zero bandwidth
744        let result = nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.0, "gaussian");
745        assert_eq!(result, vec![0.0]);
746    }
747
748    // ============== Local linear tests ==============
749
750    #[test]
751    fn test_ll_constant_data() {
752        let x = uniform_grid(20);
753        let y: Vec<f64> = vec![3.0; 20];
754
755        let y_smooth = local_linear(&x, &y, &x, 0.15, "gaussian");
756
757        for &yi in &y_smooth {
758            assert!((yi - 3.0).abs() < 0.1, "Constant should remain constant");
759        }
760    }
761
762    #[test]
763    fn test_ll_linear_data_exact() {
764        let x = uniform_grid(30);
765        let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
766
767        let y_smooth = local_linear(&x, &y, &x, 0.2, "gaussian");
768
769        // Local linear should fit linear data exactly (in interior)
770        for i in 5..25 {
771            let expected = 3.0 * x[i] + 2.0;
772            assert!(
773                (y_smooth[i] - expected).abs() < 0.1,
774                "Local linear should fit linear data well"
775            );
776        }
777    }
778
779    #[test]
780    fn test_ll_invalid_input() {
781        let result = local_linear(&[], &[], &[0.5], 0.1, "gaussian");
782        assert_eq!(result, vec![0.0]);
783
784        let result = local_linear(&[0.0, 1.0], &[1.0, 2.0], &[0.5], -0.1, "gaussian");
785        assert_eq!(result, vec![0.0]);
786    }
787
788    // ============== Local polynomial tests ==============
789
790    #[test]
791    fn test_lp_degree1_equals_local_linear() {
792        let x = uniform_grid(25);
793        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
794
795        let y_ll = local_linear(&x, &y, &x, 0.15, "gaussian");
796        let y_lp = local_polynomial(&x, &y, &x, 0.15, 1, "gaussian");
797
798        for i in 0..25 {
799            assert!(
800                (y_ll[i] - y_lp[i]).abs() < 1e-10,
801                "Degree 1 should equal local linear"
802            );
803        }
804    }
805
806    #[test]
807    fn test_lp_quadratic_data() {
808        let x = uniform_grid(40);
809        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
810
811        let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "gaussian");
812
813        // Local quadratic should fit quadratic data well in interior
814        for i in 8..32 {
815            let expected = x[i] * x[i];
816            assert!(
817                (y_smooth[i] - expected).abs() < 0.1,
818                "Local quadratic should fit quadratic data"
819            );
820        }
821    }
822
823    #[test]
824    fn test_lp_invalid_input() {
825        // Zero degree delegates to Nadaraya-Watson (not zeros)
826        let result = local_polynomial(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, 0, "gaussian");
827        let nw = nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, "gaussian");
828        assert_eq!(result, nw);
829
830        // Empty input
831        let result = local_polynomial(&[], &[], &[0.5], 0.1, 2, "gaussian");
832        assert_eq!(result, vec![0.0]);
833    }
834
835    // ============== KNN smoother tests ==============
836
837    #[test]
838    fn test_knn_k1_nearest() {
839        let x = vec![0.0, 0.5, 1.0];
840        let y = vec![1.0, 2.0, 3.0];
841
842        let result = knn_smoother(&x, &y, &[0.1, 0.6, 0.9], 1);
843
844        // k=1 should return the nearest neighbor's y value
845        assert!((result[0] - 1.0).abs() < 1e-10, "0.1 nearest to 0.0 -> 1.0");
846        assert!((result[1] - 2.0).abs() < 1e-10, "0.6 nearest to 0.5 -> 2.0");
847        assert!((result[2] - 3.0).abs() < 1e-10, "0.9 nearest to 1.0 -> 3.0");
848    }
849
850    #[test]
851    fn test_knn_k_equals_n_is_mean() {
852        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
853        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
854        let expected_mean = 3.0;
855
856        let result = knn_smoother(&x, &y, &[0.5], 5);
857
858        assert!(
859            (result[0] - expected_mean).abs() < 1e-10,
860            "k=n should return mean"
861        );
862    }
863
864    #[test]
865    fn test_knn_invalid_input() {
866        let result = knn_smoother(&[], &[], &[0.5], 3);
867        assert_eq!(result, vec![0.0]);
868
869        let result = knn_smoother(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0);
870        assert_eq!(result, vec![0.0]);
871    }
872
873    // ============== Smoothing matrix tests ==============
874
875    #[test]
876    fn test_smoothing_matrix_row_stochastic() {
877        let x = uniform_grid(10);
878        let s = smoothing_matrix_nw(&x, 0.2, "gaussian");
879
880        assert_eq!(s.len(), 100);
881
882        // Each row should sum to 1 (row stochastic)
883        for i in 0..10 {
884            let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
885            assert!(
886                (row_sum - 1.0).abs() < 1e-10,
887                "Row {} should sum to 1, got {}",
888                i,
889                row_sum
890            );
891        }
892    }
893
894    #[test]
895    fn test_smoothing_matrix_invalid_input() {
896        let result = smoothing_matrix_nw(&[], 0.1, "gaussian");
897        assert!(result.is_empty());
898
899        let result = smoothing_matrix_nw(&[0.0, 1.0], 0.0, "gaussian");
900        assert!(result.is_empty());
901    }
902
903    #[test]
904    fn test_nan_nw_no_panic() {
905        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
906        let mut y = vec![0.0, 1.0, 2.0, 1.0, 0.0];
907        y[2] = f64::NAN;
908        let result = nadaraya_watson(&x, &y, &x, 0.3, "gaussian");
909        assert_eq!(result.len(), x.len());
910        // NaN should propagate but not panic
911    }
912
913    #[test]
914    fn test_n1_smoother() {
915        // Single data point
916        let x = vec![0.5];
917        let y = vec![3.0];
918        let x_new = vec![0.5];
919        let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian");
920        assert_eq!(result.len(), 1);
921        assert!(
922            (result[0] - 3.0).abs() < 1e-6,
923            "Single point smoother should return the value"
924        );
925    }
926
927    #[test]
928    fn test_duplicate_x_smoother() {
929        // Duplicate x values
930        let x = vec![0.0, 0.0, 0.5, 1.0, 1.0];
931        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
932        let x_new = vec![0.0, 0.5, 1.0];
933        let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian");
934        assert_eq!(result.len(), 3);
935        for v in &result {
936            assert!(v.is_finite());
937        }
938    }
939
940    // ============== CV smoother tests ==============
941
942    #[test]
943    fn test_cv_smoother_linear_data() {
944        let x = uniform_grid(30);
945        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
946        let cv = cv_smoother(&x, &y, 0.2, "gaussian");
947        assert!(cv.is_finite());
948        assert!(cv >= 0.0);
949        assert!(cv < 1.0, "CV error for smooth linear data should be small");
950    }
951
952    #[test]
953    fn test_cv_smoother_invalid() {
954        assert_eq!(cv_smoother(&[], &[], 0.1, "gaussian"), f64::INFINITY);
955        assert_eq!(
956            cv_smoother(&[0.0, 1.0], &[1.0, 2.0], -0.1, "gaussian"),
957            f64::INFINITY
958        );
959    }
960
961    #[test]
962    fn test_gcv_smoother_linear_data() {
963        let x = uniform_grid(30);
964        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
965        let gcv = gcv_smoother(&x, &y, 0.2, "gaussian");
966        assert!(gcv.is_finite());
967        assert!(gcv >= 0.0);
968    }
969
970    #[test]
971    fn test_gcv_smoother_invalid() {
972        assert_eq!(gcv_smoother(&[], &[], 0.1, "gaussian"), f64::INFINITY);
973    }
974
975    #[test]
976    fn test_optim_bandwidth_returns_valid() {
977        let x = uniform_grid(30);
978        let y: Vec<f64> = x
979            .iter()
980            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
981            .collect();
982
983        let result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "gaussian", 20);
984        assert!(result.h_opt > 0.0);
985        assert!(result.value.is_finite());
986        assert_eq!(result.criterion, CvCriterion::Gcv);
987    }
988
989    #[test]
990    fn test_optim_bandwidth_cv_vs_gcv() {
991        let x = uniform_grid(25);
992        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
993
994        let cv_result = optim_bandwidth(&x, &y, None, CvCriterion::Cv, "gaussian", 20);
995        let gcv_result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "gaussian", 20);
996
997        assert!(cv_result.h_opt > 0.0);
998        assert!(gcv_result.h_opt > 0.0);
999    }
1000
1001    #[test]
1002    fn test_optim_bandwidth_custom_range() {
1003        let x = uniform_grid(20);
1004        let y: Vec<f64> = x.to_vec();
1005        let result = optim_bandwidth(&x, &y, Some((0.05, 0.5)), CvCriterion::Cv, "gaussian", 10);
1006        assert!(result.h_opt >= 0.05);
1007        assert!(result.h_opt <= 0.5);
1008    }
1009
1010    // ============== kNN CV tests ==============
1011
1012    #[test]
1013    fn test_knn_gcv_returns_valid() {
1014        let x = uniform_grid(20);
1015        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
1016
1017        let result = knn_gcv(&x, &y, 10);
1018        assert!(result.optimal_k >= 1);
1019        assert!(result.optimal_k <= 10);
1020        assert_eq!(result.cv_errors.len(), 10);
1021        for &e in &result.cv_errors {
1022            assert!(e.is_finite());
1023            assert!(e >= 0.0);
1024        }
1025    }
1026
1027    #[test]
1028    fn test_knn_gcv_constant_data() {
1029        let x = uniform_grid(15);
1030        let y = vec![5.0; 15];
1031        let result = knn_gcv(&x, &y, 5);
1032        // For constant data, error should be near zero for any k
1033        for &e in &result.cv_errors {
1034            assert!(e < 0.01, "Constant data: CV error should be near zero");
1035        }
1036    }
1037
1038    #[test]
1039    fn test_knn_lcv_returns_valid() {
1040        let x = uniform_grid(15);
1041        let y: Vec<f64> = x.to_vec();
1042
1043        let result = knn_lcv(&x, &y, 5);
1044        assert_eq!(result.len(), 15);
1045        for &k in &result {
1046            assert!(k >= 1);
1047            assert!(k <= 5);
1048        }
1049    }
1050
1051    #[test]
1052    fn test_knn_lcv_constant_data() {
1053        let x = uniform_grid(10);
1054        let y = vec![3.0; 10];
1055        let result = knn_lcv(&x, &y, 5);
1056        assert_eq!(result.len(), 10);
1057        // For constant data, all k values should give zero error
1058        // so k=1 is optimal (first tested)
1059        for &k in &result {
1060            assert!(k >= 1);
1061        }
1062    }
1063}
fdars_core/smoothing.rs

fdars_core/
smoothing.rs