fdars_core/
smoothing.rs

1//! Smoothing functions for functional data.
2//!
3//! This module provides kernel-based smoothing methods including
4//! Nadaraya-Watson, local linear, and local polynomial regression.
5
6use crate::error::FdarError;
7use crate::slice_maybe_parallel;
8#[cfg(feature = "parallel")]
9use rayon::iter::ParallelIterator;
10
11/// Gaussian kernel function.
12fn gaussian_kernel(u: f64) -> f64 {
13    (-0.5 * u * u).exp() / (2.0 * std::f64::consts::PI).sqrt()
14}
15
16/// Epanechnikov kernel function.
17fn epanechnikov_kernel(u: f64) -> f64 {
18    if u.abs() <= 1.0 {
19        0.75 * (1.0 - u * u)
20    } else {
21        0.0
22    }
23}
24
25/// Tri-cube kernel function (used by R's loess()).
26fn tricube_kernel(u: f64) -> f64 {
27    let abs_u = u.abs();
28    if abs_u < 1.0 {
29        (1.0 - abs_u.powi(3)).powi(3)
30    } else {
31        0.0
32    }
33}
34
35/// Get kernel function by name.
36fn get_kernel(kernel_type: &str) -> fn(f64) -> f64 {
37    match kernel_type.to_lowercase().as_str() {
38        "epanechnikov" | "epan" => epanechnikov_kernel,
39        "tricube" | "tri-cube" => tricube_kernel,
40        _ => gaussian_kernel,
41    }
42}
43
44/// Nadaraya-Watson kernel smoother.
45///
46/// # Arguments
47/// * `x` - Predictor values
48/// * `y` - Response values
49/// * `x_new` - Points at which to evaluate the smoother
50/// * `bandwidth` - Kernel bandwidth
51/// * `kernel` - Kernel type ("gaussian" or "epanechnikov")
52///
53/// # Returns
54/// Smoothed values at x_new
55///
56/// # Errors
57/// Returns [`FdarError::InvalidDimension`] if `x` is empty, `x_new` is empty,
58/// or `x` and `y` have different lengths.
59/// Returns [`FdarError::InvalidParameter`] if `bandwidth` is not positive.
60///
61/// # Examples
62///
63/// ```
64/// use fdars_core::smoothing::nadaraya_watson;
65///
66/// let x: Vec<f64> = (0..20).map(|i| i as f64 / 19.0).collect();
67/// let y: Vec<f64> = x.iter().map(|&xi| (xi * 6.0).sin()).collect();
68/// let smoothed = nadaraya_watson(&x, &y, &x, 0.1, "gaussian").unwrap();
69/// assert_eq!(smoothed.len(), 20);
70/// assert!(smoothed.iter().all(|v| v.is_finite()));
71/// ```
72pub fn nadaraya_watson(
73    x: &[f64],
74    y: &[f64],
75    x_new: &[f64],
76    bandwidth: f64,
77    kernel: &str,
78) -> Result<Vec<f64>, FdarError> {
79    let n = x.len();
80    if n == 0 {
81        return Err(FdarError::InvalidDimension {
82            parameter: "x",
83            expected: "non-empty slice".to_string(),
84            actual: "empty".to_string(),
85        });
86    }
87    if y.len() != n {
88        return Err(FdarError::InvalidDimension {
89            parameter: "y",
90            expected: format!("length {n} (matching x)"),
91            actual: format!("length {}", y.len()),
92        });
93    }
94    if x_new.is_empty() {
95        return Err(FdarError::InvalidDimension {
96            parameter: "x_new",
97            expected: "non-empty slice".to_string(),
98            actual: "empty".to_string(),
99        });
100    }
101    if bandwidth <= 0.0 {
102        return Err(FdarError::InvalidParameter {
103            parameter: "bandwidth",
104            message: format!("must be positive, got {bandwidth}"),
105        });
106    }
107
108    let kernel_fn = get_kernel(kernel);
109
110    Ok(slice_maybe_parallel!(x_new)
111        .map(|&x0| {
112            let mut num = 0.0;
113            let mut denom = 0.0;
114
115            for i in 0..n {
116                let u = (x[i] - x0) / bandwidth;
117                let w = kernel_fn(u);
118                num += w * y[i];
119                denom += w;
120            }
121
122            if denom > 1e-10 {
123                num / denom
124            } else {
125                0.0
126            }
127        })
128        .collect())
129}
130
131/// Local linear regression smoother.
132///
133/// # Arguments
134/// * `x` - Predictor values
135/// * `y` - Response values
136/// * `x_new` - Points at which to evaluate the smoother
137/// * `bandwidth` - Kernel bandwidth
138/// * `kernel` - Kernel type
139///
140/// # Returns
141/// Smoothed values at x_new
142///
143/// # Errors
144/// Returns [`FdarError::InvalidDimension`] if `x` is empty, `x_new` is empty,
145/// or `x` and `y` have different lengths.
146/// Returns [`FdarError::InvalidParameter`] if `bandwidth` is not positive.
147///
148/// # Examples
149///
150/// ```
151/// use fdars_core::smoothing::local_linear;
152///
153/// let x: Vec<f64> = (0..30).map(|i| i as f64 / 29.0).collect();
154/// let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
155/// let smoothed = local_linear(&x, &y, &x, 0.2, "gaussian").unwrap();
156/// assert_eq!(smoothed.len(), 30);
157/// // Local linear should fit linear data well in the interior
158/// assert!((smoothed[15] - (2.0 * x[15] + 1.0)).abs() < 0.1);
159/// ```
160pub fn local_linear(
161    x: &[f64],
162    y: &[f64],
163    x_new: &[f64],
164    bandwidth: f64,
165    kernel: &str,
166) -> Result<Vec<f64>, FdarError> {
167    let n = x.len();
168    if n == 0 {
169        return Err(FdarError::InvalidDimension {
170            parameter: "x",
171            expected: "non-empty slice".to_string(),
172            actual: "empty".to_string(),
173        });
174    }
175    if y.len() != n {
176        return Err(FdarError::InvalidDimension {
177            parameter: "y",
178            expected: format!("length {n} (matching x)"),
179            actual: format!("length {}", y.len()),
180        });
181    }
182    if x_new.is_empty() {
183        return Err(FdarError::InvalidDimension {
184            parameter: "x_new",
185            expected: "non-empty slice".to_string(),
186            actual: "empty".to_string(),
187        });
188    }
189    if bandwidth <= 0.0 {
190        return Err(FdarError::InvalidParameter {
191            parameter: "bandwidth",
192            message: format!("must be positive, got {bandwidth}"),
193        });
194    }
195
196    let kernel_fn = get_kernel(kernel);
197
198    Ok(slice_maybe_parallel!(x_new)
199        .map(|&x0| {
200            // Compute weighted moments
201            let mut s0 = 0.0;
202            let mut s1 = 0.0;
203            let mut s2 = 0.0;
204            let mut t0 = 0.0;
205            let mut t1 = 0.0;
206
207            for i in 0..n {
208                let u = (x[i] - x0) / bandwidth;
209                let w = kernel_fn(u);
210                let d = x[i] - x0;
211
212                s0 += w;
213                s1 += w * d;
214                s2 += w * d * d;
215                t0 += w * y[i];
216                t1 += w * y[i] * d;
217            }
218
219            // Solve local linear regression
220            let det = s0 * s2 - s1 * s1;
221            if det.abs() > 1e-10 {
222                (s2 * t0 - s1 * t1) / det
223            } else if s0 > 1e-10 {
224                t0 / s0
225            } else {
226                0.0
227            }
228        })
229        .collect())
230}
231
232/// Accumulate weighted normal equations (X'WX and X'Wy) for local polynomial fit.
233fn accumulate_weighted_normal_equations(
234    x: &[f64],
235    y: &[f64],
236    x0: f64,
237    bandwidth: f64,
238    p: usize,
239    kernel_fn: impl Fn(f64) -> f64,
240) -> (Vec<f64>, Vec<f64>) {
241    let n = x.len();
242    let mut xtx = vec![0.0; p * p];
243    let mut xty = vec![0.0; p];
244
245    for i in 0..n {
246        let u = (x[i] - x0) / bandwidth;
247        let w = kernel_fn(u);
248        let d = x[i] - x0;
249
250        for j in 0..p {
251            let w_dj = w * d.powi(j as i32);
252            for k in 0..p {
253                xtx[j * p + k] += w_dj * d.powi(k as i32);
254            }
255            xty[j] += w_dj * y[i];
256        }
257    }
258
259    (xtx, xty)
260}
261
262/// Solve a linear system using Gaussian elimination with partial pivoting.
263/// Returns the solution vector, or a zero vector if the system is singular.
264/// Gaussian elimination with partial pivoting (forward pass).
265/// Find the row with the largest absolute value in column `col` at or below the diagonal.
266fn find_pivot(a: &[f64], p: usize, col: usize) -> usize {
267    let mut max_idx = col;
268    for j in (col + 1)..p {
269        if a[j * p + col].abs() > a[max_idx * p + col].abs() {
270            max_idx = j;
271        }
272    }
273    max_idx
274}
275
276/// Swap two rows in both the matrix `a` and the RHS vector `b`.
277fn swap_rows(a: &mut [f64], b: &mut [f64], p: usize, row_a: usize, row_b: usize) {
278    for k in 0..p {
279        a.swap(row_a * p + k, row_b * p + k);
280    }
281    b.swap(row_a, row_b);
282}
283
284/// Subtract a scaled copy of the pivot row from all rows below it.
285fn eliminate_below(a: &mut [f64], b: &mut [f64], p: usize, pivot_row: usize) {
286    let pivot = a[pivot_row * p + pivot_row];
287    for j in (pivot_row + 1)..p {
288        let factor = a[j * p + pivot_row] / pivot;
289        for k in pivot_row..p {
290            a[j * p + k] -= factor * a[pivot_row * p + k];
291        }
292        b[j] -= factor * b[pivot_row];
293    }
294}
295
296fn forward_eliminate(a: &mut [f64], b: &mut [f64], p: usize) {
297    for i in 0..p {
298        let max_idx = find_pivot(a, p, i);
299        if max_idx != i {
300            swap_rows(a, b, p, i, max_idx);
301        }
302
303        if a[i * p + i].abs() < 1e-10 {
304            continue;
305        }
306
307        eliminate_below(a, b, p, i);
308    }
309}
310
311/// Back substitution for an upper-triangular system.
312fn back_substitute(a: &[f64], b: &[f64], p: usize) -> Vec<f64> {
313    let mut coefs = vec![0.0; p];
314    for i in (0..p).rev() {
315        let mut sum = b[i];
316        for j in (i + 1)..p {
317            sum -= a[i * p + j] * coefs[j];
318        }
319        if a[i * p + i].abs() > 1e-10 {
320            coefs[i] = sum / a[i * p + i];
321        }
322    }
323    coefs
324}
325
326fn solve_gaussian(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
327    forward_eliminate(a, b, p);
328    back_substitute(a, b, p)
329}
330
331/// Solve a linear system Ax = b via Gaussian elimination with partial pivoting.
332///
333/// Public wrapper for use by other modules (e.g., `fregre_basis_cv`).
334/// `a` is a p×p matrix in row-major order, `b` is the RHS vector of length p.
335/// Both are modified in place.
336pub fn solve_gaussian_pub(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
337    solve_gaussian(a, b, p)
338}
339
340/// Local polynomial regression smoother.
341///
342/// # Arguments
343/// * `x` - Predictor values
344/// * `y` - Response values
345/// * `x_new` - Points at which to evaluate the smoother
346/// * `bandwidth` - Kernel bandwidth
347/// * `degree` - Polynomial degree
348/// * `kernel` - Kernel type
349///
350/// # Returns
351/// Smoothed values at x_new
352///
353/// # Errors
354/// Returns [`FdarError::InvalidDimension`] if `x` is empty, `x_new` is empty,
355/// or `x` and `y` have different lengths.
356/// Returns [`FdarError::InvalidParameter`] if `bandwidth` is not positive.
357pub fn local_polynomial(
358    x: &[f64],
359    y: &[f64],
360    x_new: &[f64],
361    bandwidth: f64,
362    degree: usize,
363    kernel: &str,
364) -> Result<Vec<f64>, FdarError> {
365    let n = x.len();
366    if n == 0 {
367        return Err(FdarError::InvalidDimension {
368            parameter: "x",
369            expected: "non-empty slice".to_string(),
370            actual: "empty".to_string(),
371        });
372    }
373    if y.len() != n {
374        return Err(FdarError::InvalidDimension {
375            parameter: "y",
376            expected: format!("length {n} (matching x)"),
377            actual: format!("length {}", y.len()),
378        });
379    }
380    if x_new.is_empty() {
381        return Err(FdarError::InvalidDimension {
382            parameter: "x_new",
383            expected: "non-empty slice".to_string(),
384            actual: "empty".to_string(),
385        });
386    }
387    if bandwidth <= 0.0 {
388        return Err(FdarError::InvalidParameter {
389            parameter: "bandwidth",
390            message: format!("must be positive, got {bandwidth}"),
391        });
392    }
393    if degree == 0 {
394        return nadaraya_watson(x, y, x_new, bandwidth, kernel);
395    }
396
397    if degree == 1 {
398        return local_linear(x, y, x_new, bandwidth, kernel);
399    }
400
401    let kernel_fn = get_kernel(kernel);
402    let p = degree + 1; // Number of coefficients
403
404    Ok(slice_maybe_parallel!(x_new)
405        .map(|&x0| {
406            let (mut xtx, mut xty) =
407                accumulate_weighted_normal_equations(x, y, x0, bandwidth, p, kernel_fn);
408            let coefs = solve_gaussian(&mut xtx, &mut xty, p);
409            coefs[0]
410        })
411        .collect())
412}
413
414/// k-Nearest Neighbors smoother.
415///
416/// # Arguments
417/// * `x` - Predictor values
418/// * `y` - Response values
419/// * `x_new` - Points at which to evaluate the smoother
420/// * `k` - Number of neighbors
421///
422/// # Returns
423/// Smoothed values at x_new
424///
425/// # Errors
426/// Returns [`FdarError::InvalidDimension`] if `x` is empty, `x_new` is empty,
427/// or `x` and `y` have different lengths.
428/// Returns [`FdarError::InvalidParameter`] if `k` is zero.
429pub fn knn_smoother(x: &[f64], y: &[f64], x_new: &[f64], k: usize) -> Result<Vec<f64>, FdarError> {
430    let n = x.len();
431    if n == 0 {
432        return Err(FdarError::InvalidDimension {
433            parameter: "x",
434            expected: "non-empty slice".to_string(),
435            actual: "empty".to_string(),
436        });
437    }
438    if y.len() != n {
439        return Err(FdarError::InvalidDimension {
440            parameter: "y",
441            expected: format!("length {n} (matching x)"),
442            actual: format!("length {}", y.len()),
443        });
444    }
445    if x_new.is_empty() {
446        return Err(FdarError::InvalidDimension {
447            parameter: "x_new",
448            expected: "non-empty slice".to_string(),
449            actual: "empty".to_string(),
450        });
451    }
452    if k == 0 {
453        return Err(FdarError::InvalidParameter {
454            parameter: "k",
455            message: "must be at least 1".to_string(),
456        });
457    }
458
459    let k = k.min(n);
460
461    Ok(slice_maybe_parallel!(x_new)
462        .map(|&x0| {
463            // Compute distances
464            let mut distances: Vec<(usize, f64)> = x
465                .iter()
466                .enumerate()
467                .map(|(i, &xi)| (i, (xi - x0).abs()))
468                .collect();
469
470            // Partial sort to get k nearest
471            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
472
473            // Average of k nearest neighbors
474            let sum: f64 = distances.iter().take(k).map(|(i, _)| y[*i]).sum();
475            sum / k as f64
476        })
477        .collect())
478}
479
480/// Compute smoothing matrix for Nadaraya-Watson.
481///
482/// Returns the smoother matrix S such that y_hat = S * y.
483///
484/// # Errors
485/// Returns [`FdarError::InvalidDimension`] if `x` is empty.
486/// Returns [`FdarError::InvalidParameter`] if `bandwidth` is not positive.
487pub fn smoothing_matrix_nw(x: &[f64], bandwidth: f64, kernel: &str) -> Result<Vec<f64>, FdarError> {
488    let n = x.len();
489    if n == 0 {
490        return Err(FdarError::InvalidDimension {
491            parameter: "x",
492            expected: "non-empty slice".to_string(),
493            actual: "empty".to_string(),
494        });
495    }
496    if bandwidth <= 0.0 {
497        return Err(FdarError::InvalidParameter {
498            parameter: "bandwidth",
499            message: format!("must be positive, got {bandwidth}"),
500        });
501    }
502
503    let kernel_fn = get_kernel(kernel);
504    let mut s = vec![0.0; n * n];
505
506    for i in 0..n {
507        let mut row_sum = 0.0;
508        for j in 0..n {
509            let u = (x[j] - x[i]) / bandwidth;
510            s[i + j * n] = kernel_fn(u);
511            row_sum += s[i + j * n];
512        }
513        if row_sum > 1e-10 {
514            for j in 0..n {
515                s[i + j * n] /= row_sum;
516            }
517        }
518    }
519
520    Ok(s)
521}
522
523// ─── Cross-Validation for Kernel Smoothers ──────────────────────────────────
524
525/// CV criterion type for bandwidth selection.
526#[derive(Debug, Clone, Copy, PartialEq)]
527pub enum CvCriterion {
528    /// Leave-one-out cross-validation (R's `CV.S`).
529    Cv,
530    /// Generalized cross-validation (R's `GCV.S`).
531    Gcv,
532}
533
534/// Result of bandwidth optimization.
535#[derive(Debug, Clone, PartialEq)]
536pub struct OptimBandwidthResult {
537    /// Optimal bandwidth.
538    pub h_opt: f64,
539    /// Criterion used.
540    pub criterion: CvCriterion,
541    /// Criterion value at optimal h.
542    pub value: f64,
543}
544
545/// LOO-CV score for a kernel smoother (R's `CV.S`).
546///
547/// Computes the leave-one-out CV score by zeroing the diagonal of the
548/// smoothing matrix, re-normalizing rows, and computing mean squared error.
549///
550/// # Arguments
551/// * `x` — Predictor values
552/// * `y` — Response values
553/// * `bandwidth` — Kernel bandwidth
554/// * `kernel` — Kernel type ("gaussian", "epanechnikov", "tricube")
555///
556/// # Returns
557/// Mean squared LOO prediction error, or `INFINITY` if inputs are invalid.
558pub fn cv_smoother(x: &[f64], y: &[f64], bandwidth: f64, kernel: &str) -> f64 {
559    let n = x.len();
560    if n < 2 || y.len() != n || bandwidth <= 0.0 {
561        return f64::INFINITY;
562    }
563
564    // Get the smoother matrix S
565    let mut s = match smoothing_matrix_nw(x, bandwidth, kernel) {
566        Ok(s) => s,
567        Err(_) => return f64::INFINITY,
568    };
569
570    // Zero the diagonal → S_cv (LOO smoother)
571    for i in 0..n {
572        s[i + i * n] = 0.0;
573    }
574
575    // Re-normalize each row so it sums to 1
576    for i in 0..n {
577        let row_sum: f64 = (0..n).map(|j| s[i + j * n]).sum();
578        if row_sum > 1e-10 {
579            for j in 0..n {
580                s[i + j * n] /= row_sum;
581            }
582        }
583    }
584
585    // Compute y_hat = S_cv * y, then MSE
586    let mut mse = 0.0;
587    for i in 0..n {
588        let y_hat: f64 = (0..n).map(|j| s[i + j * n] * y[j]).sum();
589        let resid = y[i] - y_hat;
590        mse += resid * resid;
591    }
592    mse / n as f64
593}
594
595/// GCV score for a kernel smoother (R's `GCV.S`).
596///
597/// Computes `(RSS / n) / (1 - tr(S) / n)²`.
598///
599/// # Arguments
600/// * `x` — Predictor values
601/// * `y` — Response values
602/// * `bandwidth` — Kernel bandwidth
603/// * `kernel` — Kernel type
604///
605/// # Returns
606/// GCV score, or `INFINITY` if inputs are invalid or denominator is near zero.
607pub fn gcv_smoother(x: &[f64], y: &[f64], bandwidth: f64, kernel: &str) -> f64 {
608    let n = x.len();
609    if n < 2 || y.len() != n || bandwidth <= 0.0 {
610        return f64::INFINITY;
611    }
612
613    let s = match smoothing_matrix_nw(x, bandwidth, kernel) {
614        Ok(s) => s,
615        Err(_) => return f64::INFINITY,
616    };
617
618    // y_hat = S * y
619    let mut rss = 0.0;
620    for i in 0..n {
621        let y_hat: f64 = (0..n).map(|j| s[i + j * n] * y[j]).sum();
622        let resid = y[i] - y_hat;
623        rss += resid * resid;
624    }
625
626    // trace(S) = sum of diagonal
627    let trace_s: f64 = (0..n).map(|i| s[i + i * n]).sum();
628
629    let denom = 1.0 - trace_s / n as f64;
630    if denom.abs() < 1e-10 {
631        f64::INFINITY
632    } else {
633        (rss / n as f64) / (denom * denom)
634    }
635}
636
637/// Bandwidth optimizer for kernel smoothers (R's `optim.np`).
638///
639/// Grid search over evenly-spaced bandwidths, selecting the one that
640/// minimizes the specified criterion (CV or GCV).
641///
642/// # Arguments
643/// * `x` — Predictor values
644/// * `y` — Response values
645/// * `h_range` — Optional `(h_min, h_max)`. Defaults to `(h_default / 5, h_default * 5)`
646///   where `h_default = (x_max - x_min) / n^0.2`.
647/// * `criterion` — CV or GCV
648/// * `kernel` — Kernel type
649/// * `n_grid` — Number of grid points (default: 50)
650///
651/// # Examples
652///
653/// ```
654/// use fdars_core::smoothing::{optim_bandwidth, CvCriterion};
655///
656/// let x: Vec<f64> = (0..25).map(|i| i as f64 / 24.0).collect();
657/// let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
658/// let result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "gaussian", 20);
659/// assert!(result.h_opt > 0.0);
660/// assert!(result.value.is_finite());
661/// ```
662pub fn optim_bandwidth(
663    x: &[f64],
664    y: &[f64],
665    h_range: Option<(f64, f64)>,
666    criterion: CvCriterion,
667    kernel: &str,
668    n_grid: usize,
669) -> OptimBandwidthResult {
670    let n = x.len();
671    let n_grid = n_grid.max(2);
672
673    // Determine search range
674    let (h_min, h_max) = match h_range {
675        Some((lo, hi)) if lo > 0.0 && hi > lo => (lo, hi),
676        _ => {
677            let x_min = x.iter().copied().fold(f64::INFINITY, f64::min);
678            let x_max = x.iter().copied().fold(f64::NEG_INFINITY, f64::max);
679            let h_default = (x_max - x_min) / (n as f64).powf(0.2);
680            let h_default = h_default.max(1e-10);
681            (h_default / 5.0, h_default * 5.0)
682        }
683    };
684
685    let score_fn = match criterion {
686        CvCriterion::Cv => cv_smoother,
687        CvCriterion::Gcv => gcv_smoother,
688    };
689
690    let mut best_h = h_min;
691    let mut best_score = f64::INFINITY;
692
693    for i in 0..n_grid {
694        let h = h_min + (h_max - h_min) * i as f64 / (n_grid - 1) as f64;
695        let score = score_fn(x, y, h, kernel);
696        if score < best_score {
697            best_score = score;
698            best_h = h;
699        }
700    }
701
702    OptimBandwidthResult {
703        h_opt: best_h,
704        criterion,
705        value: best_score,
706    }
707}
708
709// ─── kNN CV Functions ───────────────────────────────────────────────────────
710
711/// Result of kNN k-selection by cross-validation.
712#[derive(Debug, Clone, PartialEq)]
713pub struct KnnCvResult {
714    /// Optimal k (number of neighbors).
715    pub optimal_k: usize,
716    /// CV error for each k tested (index 0 = k=1).
717    pub cv_errors: Vec<f64>,
718}
719
720/// Global LOO-CV for kNN k selection (R's `knn.gcv`).
721///
722/// For each candidate k, computes LOO prediction error using a
723/// kernel-weighted kNN estimator with Epanechnikov kernel.
724///
725/// # Arguments
726/// * `x` — Predictor values
727/// * `y` — Response values
728/// * `max_k` — Maximum k to test (tests k = 1, 2, …, max_k)
729pub fn knn_gcv(x: &[f64], y: &[f64], max_k: usize) -> KnnCvResult {
730    let n = x.len();
731    let max_k = max_k.min(n.saturating_sub(1)).max(1);
732
733    // Precompute sorted distances from each point to all others
734    let mut sorted_neighbors: Vec<Vec<(usize, f64)>> = Vec::with_capacity(n);
735    for i in 0..n {
736        let mut dists: Vec<(usize, f64)> = (0..n)
737            .filter(|&j| j != i)
738            .map(|j| (j, (x[j] - x[i]).abs()))
739            .collect();
740        dists.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
741        sorted_neighbors.push(dists);
742    }
743
744    let mut cv_errors = Vec::with_capacity(max_k);
745
746    for k in 1..=max_k {
747        let mut mse = 0.0;
748        for i in 0..n {
749            let neighbors = &sorted_neighbors[i];
750            // Bandwidth: midpoint between k-th and (k+1)-th NN distances
751            let d_k = if k <= neighbors.len() {
752                neighbors[k - 1].1
753            } else {
754                neighbors.last().map_or(1.0, |x| x.1)
755            };
756            let d_k1 = if k < neighbors.len() {
757                neighbors[k].1
758            } else {
759                d_k * 2.0
760            };
761            let h = (d_k + d_k1) / 2.0;
762            let h = h.max(1e-10);
763
764            // Epanechnikov kernel weighted prediction
765            let mut num = 0.0;
766            let mut den = 0.0;
767            for &(j, dist) in neighbors.iter().take(k) {
768                let u = dist / h;
769                let w = epanechnikov_kernel(u);
770                num += w * y[j];
771                den += w;
772            }
773            let y_hat = if den > 1e-10 { num / den } else { y[i] };
774            mse += (y[i] - y_hat).powi(2);
775        }
776        cv_errors.push(mse / n as f64);
777    }
778
779    let optimal_k = cv_errors
780        .iter()
781        .enumerate()
782        .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
783        .map_or(1, |(i, _)| i + 1);
784
785    KnnCvResult {
786        optimal_k,
787        cv_errors,
788    }
789}
790
791/// Local (per-observation) LOO-CV for kNN k selection (R's `knn.lcv`).
792///
793/// For each observation, independently selects the best k by minimizing
794/// the absolute LOO prediction error at that point.
795///
796/// # Arguments
797/// * `x` — Predictor values
798/// * `y` — Response values
799/// * `max_k` — Maximum k to test
800///
801/// # Returns
802/// Vector of per-observation optimal k values (length n).
803pub fn knn_lcv(x: &[f64], y: &[f64], max_k: usize) -> Vec<usize> {
804    let n = x.len();
805    let max_k = max_k.min(n.saturating_sub(1)).max(1);
806
807    let mut per_obs_k = vec![1usize; n];
808
809    for i in 0..n {
810        // Sort neighbors by distance (excluding self)
811        let mut neighbors: Vec<(usize, f64)> = (0..n)
812            .filter(|&j| j != i)
813            .map(|j| (j, (x[j] - x[i]).abs()))
814            .collect();
815        neighbors.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
816
817        let mut best_k = 1;
818        let mut best_err = f64::INFINITY;
819
820        for k in 1..=max_k {
821            // Simple kNN average of k nearest neighbors
822            let sum: f64 = neighbors.iter().take(k).map(|&(j, _)| y[j]).sum();
823            let y_hat = sum / k as f64;
824            let err = (y[i] - y_hat).abs();
825            if err < best_err {
826                best_err = err;
827                best_k = k;
828            }
829        }
830        per_obs_k[i] = best_k;
831    }
832
833    per_obs_k
834}
835
836#[cfg(test)]
837mod tests {
838    use super::*;
839    use crate::test_helpers::uniform_grid;
840
841    // ============== Nadaraya-Watson tests ==============
842
843    #[test]
844    fn test_nw_constant_data() {
845        let x = uniform_grid(20);
846        let y: Vec<f64> = vec![5.0; 20];
847
848        let y_smooth = nadaraya_watson(&x, &y, &x, 0.1, "gaussian").unwrap();
849
850        // Smoothing constant data should return constant
851        for &yi in &y_smooth {
852            assert!(
853                (yi - 5.0).abs() < 0.1,
854                "Constant data should remain constant"
855            );
856        }
857    }
858
859    #[test]
860    fn test_nw_linear_data() {
861        let x = uniform_grid(50);
862        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
863
864        let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "gaussian").unwrap();
865
866        // Linear data should be approximately preserved (with some edge effects)
867        for i in 10..40 {
868            let expected = 2.0 * x[i] + 1.0;
869            assert!(
870                (y_smooth[i] - expected).abs() < 0.2,
871                "Linear trend should be approximately preserved"
872            );
873        }
874    }
875
876    #[test]
877    fn test_nw_gaussian_vs_epanechnikov() {
878        let x = uniform_grid(30);
879        let y: Vec<f64> = x
880            .iter()
881            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
882            .collect();
883
884        let y_gauss = nadaraya_watson(&x, &y, &x, 0.1, "gaussian").unwrap();
885        let y_epan = nadaraya_watson(&x, &y, &x, 0.1, "epanechnikov").unwrap();
886
887        // Both should produce valid output
888        assert_eq!(y_gauss.len(), 30);
889        assert_eq!(y_epan.len(), 30);
890
891        // They should be different (different kernels)
892        let diff: f64 = y_gauss
893            .iter()
894            .zip(&y_epan)
895            .map(|(a, b)| (a - b).abs())
896            .sum();
897        assert!(
898            diff > 0.0,
899            "Different kernels should give different results"
900        );
901    }
902
903    #[test]
904    fn test_nw_invalid_input() {
905        // Empty input
906        assert!(nadaraya_watson(&[], &[], &[0.5], 0.1, "gaussian").is_err());
907
908        // Mismatched lengths
909        assert!(nadaraya_watson(&[0.0, 1.0], &[1.0], &[0.5], 0.1, "gaussian").is_err());
910
911        // Zero bandwidth
912        assert!(nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.0, "gaussian").is_err());
913
914        // Empty x_new
915        assert!(nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[], 0.1, "gaussian").is_err());
916    }
917
918    // ============== Local linear tests ==============
919
920    #[test]
921    fn test_ll_constant_data() {
922        let x = uniform_grid(20);
923        let y: Vec<f64> = vec![3.0; 20];
924
925        let y_smooth = local_linear(&x, &y, &x, 0.15, "gaussian").unwrap();
926
927        for &yi in &y_smooth {
928            assert!((yi - 3.0).abs() < 0.1, "Constant should remain constant");
929        }
930    }
931
932    #[test]
933    fn test_ll_linear_data_exact() {
934        let x = uniform_grid(30);
935        let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
936
937        let y_smooth = local_linear(&x, &y, &x, 0.2, "gaussian").unwrap();
938
939        // Local linear should fit linear data exactly (in interior)
940        for i in 5..25 {
941            let expected = 3.0 * x[i] + 2.0;
942            assert!(
943                (y_smooth[i] - expected).abs() < 0.1,
944                "Local linear should fit linear data well"
945            );
946        }
947    }
948
949    #[test]
950    fn test_ll_invalid_input() {
951        assert!(local_linear(&[], &[], &[0.5], 0.1, "gaussian").is_err());
952
953        assert!(local_linear(&[0.0, 1.0], &[1.0, 2.0], &[0.5], -0.1, "gaussian").is_err());
954    }
955
956    // ============== Local polynomial tests ==============
957
958    #[test]
959    fn test_lp_degree1_equals_local_linear() {
960        let x = uniform_grid(25);
961        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
962
963        let y_ll = local_linear(&x, &y, &x, 0.15, "gaussian").unwrap();
964        let y_lp = local_polynomial(&x, &y, &x, 0.15, 1, "gaussian").unwrap();
965
966        for i in 0..25 {
967            assert!(
968                (y_ll[i] - y_lp[i]).abs() < 1e-10,
969                "Degree 1 should equal local linear"
970            );
971        }
972    }
973
974    #[test]
975    fn test_lp_quadratic_data() {
976        let x = uniform_grid(40);
977        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
978
979        let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "gaussian").unwrap();
980
981        // Local quadratic should fit quadratic data well in interior
982        for i in 8..32 {
983            let expected = x[i] * x[i];
984            assert!(
985                (y_smooth[i] - expected).abs() < 0.1,
986                "Local quadratic should fit quadratic data"
987            );
988        }
989    }
990
991    #[test]
992    fn test_lp_invalid_input() {
993        // Zero degree delegates to Nadaraya-Watson (not zeros)
994        let result =
995            local_polynomial(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, 0, "gaussian").unwrap();
996        let nw = nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, "gaussian").unwrap();
997        assert_eq!(result, nw);
998
999        // Empty input
1000        assert!(local_polynomial(&[], &[], &[0.5], 0.1, 2, "gaussian").is_err());
1001    }
1002
1003    // ============== KNN smoother tests ==============
1004
1005    #[test]
1006    fn test_knn_k1_nearest() {
1007        let x = vec![0.0, 0.5, 1.0];
1008        let y = vec![1.0, 2.0, 3.0];
1009
1010        let result = knn_smoother(&x, &y, &[0.1, 0.6, 0.9], 1).unwrap();
1011
1012        // k=1 should return the nearest neighbor's y value
1013        assert!((result[0] - 1.0).abs() < 1e-10, "0.1 nearest to 0.0 -> 1.0");
1014        assert!((result[1] - 2.0).abs() < 1e-10, "0.6 nearest to 0.5 -> 2.0");
1015        assert!((result[2] - 3.0).abs() < 1e-10, "0.9 nearest to 1.0 -> 3.0");
1016    }
1017
1018    #[test]
1019    fn test_knn_k_equals_n_is_mean() {
1020        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
1021        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1022        let expected_mean = 3.0;
1023
1024        let result = knn_smoother(&x, &y, &[0.5], 5).unwrap();
1025
1026        assert!(
1027            (result[0] - expected_mean).abs() < 1e-10,
1028            "k=n should return mean"
1029        );
1030    }
1031
1032    #[test]
1033    fn test_knn_invalid_input() {
1034        assert!(knn_smoother(&[], &[], &[0.5], 3).is_err());
1035
1036        assert!(knn_smoother(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0).is_err());
1037    }
1038
1039    // ============== Smoothing matrix tests ==============
1040
1041    #[test]
1042    fn test_smoothing_matrix_row_stochastic() {
1043        let x = uniform_grid(10);
1044        let s = smoothing_matrix_nw(&x, 0.2, "gaussian").unwrap();
1045
1046        assert_eq!(s.len(), 100);
1047
1048        // Each row should sum to 1 (row stochastic)
1049        for i in 0..10 {
1050            let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
1051            assert!(
1052                (row_sum - 1.0).abs() < 1e-10,
1053                "Row {} should sum to 1, got {}",
1054                i,
1055                row_sum
1056            );
1057        }
1058    }
1059
1060    #[test]
1061    fn test_smoothing_matrix_invalid_input() {
1062        assert!(smoothing_matrix_nw(&[], 0.1, "gaussian").is_err());
1063
1064        assert!(smoothing_matrix_nw(&[0.0, 1.0], 0.0, "gaussian").is_err());
1065    }
1066
1067    #[test]
1068    fn test_nan_nw_no_panic() {
1069        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
1070        let mut y = vec![0.0, 1.0, 2.0, 1.0, 0.0];
1071        y[2] = f64::NAN;
1072        let result = nadaraya_watson(&x, &y, &x, 0.3, "gaussian").unwrap();
1073        assert_eq!(result.len(), x.len());
1074        // NaN should propagate but not panic
1075    }
1076
1077    #[test]
1078    fn test_n1_smoother() {
1079        // Single data point
1080        let x = vec![0.5];
1081        let y = vec![3.0];
1082        let x_new = vec![0.5];
1083        let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian").unwrap();
1084        assert_eq!(result.len(), 1);
1085        assert!(
1086            (result[0] - 3.0).abs() < 1e-6,
1087            "Single point smoother should return the value"
1088        );
1089    }
1090
1091    #[test]
1092    fn test_duplicate_x_smoother() {
1093        // Duplicate x values
1094        let x = vec![0.0, 0.0, 0.5, 1.0, 1.0];
1095        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1096        let x_new = vec![0.0, 0.5, 1.0];
1097        let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian").unwrap();
1098        assert_eq!(result.len(), 3);
1099        for v in &result {
1100            assert!(v.is_finite());
1101        }
1102    }
1103
1104    // ============== CV smoother tests ==============
1105
1106    #[test]
1107    fn test_cv_smoother_linear_data() {
1108        let x = uniform_grid(30);
1109        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
1110        let cv = cv_smoother(&x, &y, 0.2, "gaussian");
1111        assert!(cv.is_finite());
1112        assert!(cv >= 0.0);
1113        assert!(cv < 1.0, "CV error for smooth linear data should be small");
1114    }
1115
1116    #[test]
1117    fn test_cv_smoother_invalid() {
1118        assert_eq!(cv_smoother(&[], &[], 0.1, "gaussian"), f64::INFINITY);
1119        assert_eq!(
1120            cv_smoother(&[0.0, 1.0], &[1.0, 2.0], -0.1, "gaussian"),
1121            f64::INFINITY
1122        );
1123    }
1124
1125    #[test]
1126    fn test_gcv_smoother_linear_data() {
1127        let x = uniform_grid(30);
1128        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
1129        let gcv = gcv_smoother(&x, &y, 0.2, "gaussian");
1130        assert!(gcv.is_finite());
1131        assert!(gcv >= 0.0);
1132    }
1133
1134    #[test]
1135    fn test_gcv_smoother_invalid() {
1136        assert_eq!(gcv_smoother(&[], &[], 0.1, "gaussian"), f64::INFINITY);
1137    }
1138
1139    #[test]
1140    fn test_optim_bandwidth_returns_valid() {
1141        let x = uniform_grid(30);
1142        let y: Vec<f64> = x
1143            .iter()
1144            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
1145            .collect();
1146
1147        let result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "gaussian", 20);
1148        assert!(result.h_opt > 0.0);
1149        assert!(result.value.is_finite());
1150        assert_eq!(result.criterion, CvCriterion::Gcv);
1151    }
1152
1153    #[test]
1154    fn test_optim_bandwidth_cv_vs_gcv() {
1155        let x = uniform_grid(25);
1156        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
1157
1158        let cv_result = optim_bandwidth(&x, &y, None, CvCriterion::Cv, "gaussian", 20);
1159        let gcv_result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "gaussian", 20);
1160
1161        assert!(cv_result.h_opt > 0.0);
1162        assert!(gcv_result.h_opt > 0.0);
1163    }
1164
1165    #[test]
1166    fn test_optim_bandwidth_custom_range() {
1167        let x = uniform_grid(20);
1168        let y: Vec<f64> = x.to_vec();
1169        let result = optim_bandwidth(&x, &y, Some((0.05, 0.5)), CvCriterion::Cv, "gaussian", 10);
1170        assert!(result.h_opt >= 0.05);
1171        assert!(result.h_opt <= 0.5);
1172    }
1173
1174    // ============== kNN CV tests ==============
1175
1176    #[test]
1177    fn test_knn_gcv_returns_valid() {
1178        let x = uniform_grid(20);
1179        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
1180
1181        let result = knn_gcv(&x, &y, 10);
1182        assert!(result.optimal_k >= 1);
1183        assert!(result.optimal_k <= 10);
1184        assert_eq!(result.cv_errors.len(), 10);
1185        for &e in &result.cv_errors {
1186            assert!(e.is_finite());
1187            assert!(e >= 0.0);
1188        }
1189    }
1190
1191    #[test]
1192    fn test_knn_gcv_constant_data() {
1193        let x = uniform_grid(15);
1194        let y = vec![5.0; 15];
1195        let result = knn_gcv(&x, &y, 5);
1196        // For constant data, error should be near zero for any k
1197        for &e in &result.cv_errors {
1198            assert!(e < 0.01, "Constant data: CV error should be near zero");
1199        }
1200    }
1201
1202    #[test]
1203    fn test_knn_lcv_returns_valid() {
1204        let x = uniform_grid(15);
1205        let y: Vec<f64> = x.to_vec();
1206
1207        let result = knn_lcv(&x, &y, 5);
1208        assert_eq!(result.len(), 15);
1209        for &k in &result {
1210            assert!(k >= 1);
1211            assert!(k <= 5);
1212        }
1213    }
1214
1215    #[test]
1216    fn test_knn_lcv_constant_data() {
1217        let x = uniform_grid(10);
1218        let y = vec![3.0; 10];
1219        let result = knn_lcv(&x, &y, 5);
1220        assert_eq!(result.len(), 10);
1221        // For constant data, all k values should give zero error
1222        // so k=1 is optimal (first tested)
1223        for &k in &result {
1224            assert!(k >= 1);
1225        }
1226    }
1227
1228    // ============== Tricube kernel tests ==============
1229
1230    #[test]
1231    fn test_tricube_kernel_values() {
1232        // At u=0, tricube should be 1.0
1233        let k0 = tricube_kernel(0.0);
1234        assert!((k0 - 1.0).abs() < 1e-10, "tricube(0) should be 1.0");
1235
1236        // At |u| >= 1, tricube should be 0.0
1237        assert_eq!(tricube_kernel(1.0), 0.0, "tricube(1) should be 0");
1238        assert_eq!(tricube_kernel(-1.0), 0.0, "tricube(-1) should be 0");
1239        assert_eq!(tricube_kernel(2.0), 0.0, "tricube(2) should be 0");
1240
1241        // At u=0.5, should be positive and less than 1
1242        let k05 = tricube_kernel(0.5);
1243        assert!(k05 > 0.0 && k05 < 1.0, "tricube(0.5) should be in (0, 1)");
1244    }
1245
1246    #[test]
1247    fn test_nw_tricube_constant_data() {
1248        let x = uniform_grid(20);
1249        let y = vec![5.0; 20];
1250
1251        let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "tricube").unwrap();
1252
1253        for &yi in &y_smooth {
1254            assert!(
1255                (yi - 5.0).abs() < 0.1,
1256                "Tricube NW: constant data should remain constant"
1257            );
1258        }
1259    }
1260
1261    #[test]
1262    fn test_nw_tricube_linear_data() {
1263        let x = uniform_grid(50);
1264        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
1265
1266        let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "tricube").unwrap();
1267
1268        // Interior points should be approximately correct
1269        for i in 10..40 {
1270            let expected = 2.0 * x[i] + 1.0;
1271            assert!(
1272                (y_smooth[i] - expected).abs() < 0.3,
1273                "Tricube NW: linear trend should be approximately preserved at i={i}"
1274            );
1275        }
1276    }
1277
1278    #[test]
1279    fn test_nw_tricube_vs_gaussian() {
1280        let x = uniform_grid(30);
1281        let y: Vec<f64> = x
1282            .iter()
1283            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
1284            .collect();
1285
1286        let y_gauss = nadaraya_watson(&x, &y, &x, 0.15, "gaussian").unwrap();
1287        let y_tri = nadaraya_watson(&x, &y, &x, 0.15, "tricube").unwrap();
1288
1289        assert_eq!(y_gauss.len(), y_tri.len());
1290
1291        // Both should produce valid output
1292        assert!(y_tri.iter().all(|v| v.is_finite()));
1293
1294        // They should be different
1295        let diff: f64 = y_gauss.iter().zip(&y_tri).map(|(a, b)| (a - b).abs()).sum();
1296        assert!(
1297            diff > 0.0,
1298            "Gaussian and tricube kernels should give different results"
1299        );
1300    }
1301
1302    #[test]
1303    fn test_nw_tricube_vs_epanechnikov() {
1304        let x = uniform_grid(30);
1305        let y: Vec<f64> = x
1306            .iter()
1307            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
1308            .collect();
1309
1310        let y_epan = nadaraya_watson(&x, &y, &x, 0.15, "epanechnikov").unwrap();
1311        let y_tri = nadaraya_watson(&x, &y, &x, 0.15, "tricube").unwrap();
1312
1313        // Both compact support kernels should produce finite output
1314        assert!(y_epan.iter().all(|v| v.is_finite()));
1315        assert!(y_tri.iter().all(|v| v.is_finite()));
1316
1317        // Should differ since kernel shapes are different
1318        let diff: f64 = y_epan.iter().zip(&y_tri).map(|(a, b)| (a - b).abs()).sum();
1319        assert!(
1320            diff > 0.0,
1321            "Epanechnikov and tricube should give different results"
1322        );
1323    }
1324
1325    #[test]
1326    fn test_ll_tricube_constant_data() {
1327        let x = uniform_grid(20);
1328        let y = vec![3.0; 20];
1329
1330        let y_smooth = local_linear(&x, &y, &x, 0.2, "tricube").unwrap();
1331
1332        for &yi in &y_smooth {
1333            assert!(
1334                (yi - 3.0).abs() < 0.1,
1335                "Tricube LL: constant should remain constant"
1336            );
1337        }
1338    }
1339
1340    #[test]
1341    fn test_ll_tricube_linear_data() {
1342        let x = uniform_grid(30);
1343        let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
1344
1345        let y_smooth = local_linear(&x, &y, &x, 0.2, "tricube").unwrap();
1346
1347        // Local linear should fit linear data well in the interior
1348        for i in 5..25 {
1349            let expected = 3.0 * x[i] + 2.0;
1350            assert!(
1351                (y_smooth[i] - expected).abs() < 0.2,
1352                "Tricube LL: should fit linear data well at i={i}"
1353            );
1354        }
1355    }
1356
1357    #[test]
1358    fn test_ll_tricube_vs_gaussian() {
1359        let x = uniform_grid(30);
1360        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
1361
1362        let y_gauss = local_linear(&x, &y, &x, 0.15, "gaussian").unwrap();
1363        let y_tri = local_linear(&x, &y, &x, 0.15, "tricube").unwrap();
1364
1365        assert_eq!(y_gauss.len(), y_tri.len());
1366        assert!(y_tri.iter().all(|v| v.is_finite()));
1367
1368        let diff: f64 = y_gauss.iter().zip(&y_tri).map(|(a, b)| (a - b).abs()).sum();
1369        assert!(
1370            diff > 0.0,
1371            "Gaussian and tricube local linear should differ"
1372        );
1373    }
1374
1375    #[test]
1376    fn test_lp_tricube_quadratic() {
1377        let x = uniform_grid(40);
1378        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
1379
1380        let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "tricube").unwrap();
1381
1382        // Local quadratic with tricube should fit well in interior
1383        for i in 8..32 {
1384            let expected = x[i] * x[i];
1385            assert!(
1386                (y_smooth[i] - expected).abs() < 0.15,
1387                "Tricube LP: should fit quadratic data at i={i}"
1388            );
1389        }
1390    }
1391
1392    #[test]
1393    fn test_get_kernel_tricube_aliases() {
1394        // "tricube" and "tri-cube" should both resolve to the tricube kernel
1395        let k1 = get_kernel("tricube");
1396        let k2 = get_kernel("tri-cube");
1397
1398        let test_val = 0.5;
1399        assert!(
1400            (k1(test_val) - k2(test_val)).abs() < 1e-15,
1401            "Both tricube aliases should give the same result"
1402        );
1403    }
1404
1405    #[test]
1406    fn test_smoothing_matrix_tricube() {
1407        let x = uniform_grid(10);
1408        let s = smoothing_matrix_nw(&x, 0.2, "tricube").unwrap();
1409
1410        assert_eq!(s.len(), 100);
1411
1412        // Each row should sum to 1 (row stochastic)
1413        for i in 0..10 {
1414            let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
1415            assert!(
1416                (row_sum - 1.0).abs() < 1e-10,
1417                "Tricube: row {} should sum to 1, got {}",
1418                i,
1419                row_sum
1420            );
1421        }
1422    }
1423
1424    #[test]
1425    fn test_cv_smoother_tricube() {
1426        let x = uniform_grid(30);
1427        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
1428        let cv = cv_smoother(&x, &y, 0.2, "tricube");
1429        assert!(cv.is_finite());
1430        assert!(cv >= 0.0);
1431    }
1432
1433    #[test]
1434    fn test_optim_bandwidth_tricube() {
1435        let x = uniform_grid(25);
1436        let y: Vec<f64> = x
1437            .iter()
1438            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
1439            .collect();
1440
1441        let result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "tricube", 20);
1442        assert!(result.h_opt > 0.0);
1443        assert!(result.value.is_finite());
1444    }
1445}
fdars_core/smoothing.rs

fdars_core/
smoothing.rs