Skip to main content

fdars_core/
smoothing.rs

1//! Smoothing functions for functional data.
2//!
3//! This module provides kernel-based smoothing methods including
4//! Nadaraya-Watson, local linear, and local polynomial regression.
5
6use crate::slice_maybe_parallel;
7#[cfg(feature = "parallel")]
8use rayon::iter::ParallelIterator;
9
10/// Gaussian kernel function.
11fn gaussian_kernel(u: f64) -> f64 {
12    (-0.5 * u * u).exp() / (2.0 * std::f64::consts::PI).sqrt()
13}
14
15/// Epanechnikov kernel function.
16fn epanechnikov_kernel(u: f64) -> f64 {
17    if u.abs() <= 1.0 {
18        0.75 * (1.0 - u * u)
19    } else {
20        0.0
21    }
22}
23
24/// Tri-cube kernel function (used by R's loess()).
25fn tricube_kernel(u: f64) -> f64 {
26    let abs_u = u.abs();
27    if abs_u < 1.0 {
28        (1.0 - abs_u.powi(3)).powi(3)
29    } else {
30        0.0
31    }
32}
33
34/// Get kernel function by name.
35fn get_kernel(kernel_type: &str) -> fn(f64) -> f64 {
36    match kernel_type.to_lowercase().as_str() {
37        "epanechnikov" | "epan" => epanechnikov_kernel,
38        "tricube" | "tri-cube" => tricube_kernel,
39        _ => gaussian_kernel,
40    }
41}
42
43/// Nadaraya-Watson kernel smoother.
44///
45/// # Arguments
46/// * `x` - Predictor values
47/// * `y` - Response values
48/// * `x_new` - Points at which to evaluate the smoother
49/// * `bandwidth` - Kernel bandwidth
50/// * `kernel` - Kernel type ("gaussian" or "epanechnikov")
51///
52/// # Returns
53/// Smoothed values at x_new
54pub fn nadaraya_watson(
55    x: &[f64],
56    y: &[f64],
57    x_new: &[f64],
58    bandwidth: f64,
59    kernel: &str,
60) -> Vec<f64> {
61    let n = x.len();
62    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
63        return vec![0.0; x_new.len()];
64    }
65
66    let kernel_fn = get_kernel(kernel);
67
68    slice_maybe_parallel!(x_new)
69        .map(|&x0| {
70            let mut num = 0.0;
71            let mut denom = 0.0;
72
73            for i in 0..n {
74                let u = (x[i] - x0) / bandwidth;
75                let w = kernel_fn(u);
76                num += w * y[i];
77                denom += w;
78            }
79
80            if denom > 1e-10 {
81                num / denom
82            } else {
83                0.0
84            }
85        })
86        .collect()
87}
88
89/// Local linear regression smoother.
90///
91/// # Arguments
92/// * `x` - Predictor values
93/// * `y` - Response values
94/// * `x_new` - Points at which to evaluate the smoother
95/// * `bandwidth` - Kernel bandwidth
96/// * `kernel` - Kernel type
97///
98/// # Returns
99/// Smoothed values at x_new
100pub fn local_linear(x: &[f64], y: &[f64], x_new: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
101    let n = x.len();
102    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
103        return vec![0.0; x_new.len()];
104    }
105
106    let kernel_fn = get_kernel(kernel);
107
108    slice_maybe_parallel!(x_new)
109        .map(|&x0| {
110            // Compute weighted moments
111            let mut s0 = 0.0;
112            let mut s1 = 0.0;
113            let mut s2 = 0.0;
114            let mut t0 = 0.0;
115            let mut t1 = 0.0;
116
117            for i in 0..n {
118                let u = (x[i] - x0) / bandwidth;
119                let w = kernel_fn(u);
120                let d = x[i] - x0;
121
122                s0 += w;
123                s1 += w * d;
124                s2 += w * d * d;
125                t0 += w * y[i];
126                t1 += w * y[i] * d;
127            }
128
129            // Solve local linear regression
130            let det = s0 * s2 - s1 * s1;
131            if det.abs() > 1e-10 {
132                (s2 * t0 - s1 * t1) / det
133            } else if s0 > 1e-10 {
134                t0 / s0
135            } else {
136                0.0
137            }
138        })
139        .collect()
140}
141
142/// Accumulate weighted normal equations (X'WX and X'Wy) for local polynomial fit.
143fn accumulate_weighted_normal_equations(
144    x: &[f64],
145    y: &[f64],
146    x0: f64,
147    bandwidth: f64,
148    p: usize,
149    kernel_fn: impl Fn(f64) -> f64,
150) -> (Vec<f64>, Vec<f64>) {
151    let n = x.len();
152    let mut xtx = vec![0.0; p * p];
153    let mut xty = vec![0.0; p];
154
155    for i in 0..n {
156        let u = (x[i] - x0) / bandwidth;
157        let w = kernel_fn(u);
158        let d = x[i] - x0;
159
160        for j in 0..p {
161            let w_dj = w * d.powi(j as i32);
162            for k in 0..p {
163                xtx[j * p + k] += w_dj * d.powi(k as i32);
164            }
165            xty[j] += w_dj * y[i];
166        }
167    }
168
169    (xtx, xty)
170}
171
172/// Solve a linear system using Gaussian elimination with partial pivoting.
173/// Returns the solution vector, or a zero vector if the system is singular.
174/// Gaussian elimination with partial pivoting (forward pass).
175/// Find the row with the largest absolute value in column `col` at or below the diagonal.
176fn find_pivot(a: &[f64], p: usize, col: usize) -> usize {
177    let mut max_idx = col;
178    for j in (col + 1)..p {
179        if a[j * p + col].abs() > a[max_idx * p + col].abs() {
180            max_idx = j;
181        }
182    }
183    max_idx
184}
185
186/// Swap two rows in both the matrix `a` and the RHS vector `b`.
187fn swap_rows(a: &mut [f64], b: &mut [f64], p: usize, row_a: usize, row_b: usize) {
188    for k in 0..p {
189        a.swap(row_a * p + k, row_b * p + k);
190    }
191    b.swap(row_a, row_b);
192}
193
194/// Subtract a scaled copy of the pivot row from all rows below it.
195fn eliminate_below(a: &mut [f64], b: &mut [f64], p: usize, pivot_row: usize) {
196    let pivot = a[pivot_row * p + pivot_row];
197    for j in (pivot_row + 1)..p {
198        let factor = a[j * p + pivot_row] / pivot;
199        for k in pivot_row..p {
200            a[j * p + k] -= factor * a[pivot_row * p + k];
201        }
202        b[j] -= factor * b[pivot_row];
203    }
204}
205
206fn forward_eliminate(a: &mut [f64], b: &mut [f64], p: usize) {
207    for i in 0..p {
208        let max_idx = find_pivot(a, p, i);
209        if max_idx != i {
210            swap_rows(a, b, p, i, max_idx);
211        }
212
213        if a[i * p + i].abs() < 1e-10 {
214            continue;
215        }
216
217        eliminate_below(a, b, p, i);
218    }
219}
220
221/// Back substitution for an upper-triangular system.
222fn back_substitute(a: &[f64], b: &[f64], p: usize) -> Vec<f64> {
223    let mut coefs = vec![0.0; p];
224    for i in (0..p).rev() {
225        let mut sum = b[i];
226        for j in (i + 1)..p {
227            sum -= a[i * p + j] * coefs[j];
228        }
229        if a[i * p + i].abs() > 1e-10 {
230            coefs[i] = sum / a[i * p + i];
231        }
232    }
233    coefs
234}
235
236fn solve_gaussian(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
237    forward_eliminate(a, b, p);
238    back_substitute(a, b, p)
239}
240
241/// Local polynomial regression smoother.
242///
243/// # Arguments
244/// * `x` - Predictor values
245/// * `y` - Response values
246/// * `x_new` - Points at which to evaluate the smoother
247/// * `bandwidth` - Kernel bandwidth
248/// * `degree` - Polynomial degree
249/// * `kernel` - Kernel type
250///
251/// # Returns
252/// Smoothed values at x_new
253pub fn local_polynomial(
254    x: &[f64],
255    y: &[f64],
256    x_new: &[f64],
257    bandwidth: f64,
258    degree: usize,
259    kernel: &str,
260) -> Vec<f64> {
261    let n = x.len();
262    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 || degree == 0 {
263        return vec![0.0; x_new.len()];
264    }
265
266    if degree == 1 {
267        return local_linear(x, y, x_new, bandwidth, kernel);
268    }
269
270    let kernel_fn = get_kernel(kernel);
271    let p = degree + 1; // Number of coefficients
272
273    slice_maybe_parallel!(x_new)
274        .map(|&x0| {
275            let (mut xtx, mut xty) =
276                accumulate_weighted_normal_equations(x, y, x0, bandwidth, p, kernel_fn);
277            let coefs = solve_gaussian(&mut xtx, &mut xty, p);
278            coefs[0]
279        })
280        .collect()
281}
282
283/// k-Nearest Neighbors smoother.
284///
285/// # Arguments
286/// * `x` - Predictor values
287/// * `y` - Response values
288/// * `x_new` - Points at which to evaluate the smoother
289/// * `k` - Number of neighbors
290///
291/// # Returns
292/// Smoothed values at x_new
293pub fn knn_smoother(x: &[f64], y: &[f64], x_new: &[f64], k: usize) -> Vec<f64> {
294    let n = x.len();
295    if n == 0 || y.len() != n || x_new.is_empty() || k == 0 {
296        return vec![0.0; x_new.len()];
297    }
298
299    let k = k.min(n);
300
301    slice_maybe_parallel!(x_new)
302        .map(|&x0| {
303            // Compute distances
304            let mut distances: Vec<(usize, f64)> = x
305                .iter()
306                .enumerate()
307                .map(|(i, &xi)| (i, (xi - x0).abs()))
308                .collect();
309
310            // Partial sort to get k nearest
311            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
312
313            // Average of k nearest neighbors
314            let sum: f64 = distances.iter().take(k).map(|(i, _)| y[*i]).sum();
315            sum / k as f64
316        })
317        .collect()
318}
319
320/// Compute smoothing matrix for Nadaraya-Watson.
321///
322/// Returns the smoother matrix S such that y_hat = S * y.
323pub fn smoothing_matrix_nw(x: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
324    let n = x.len();
325    if n == 0 || bandwidth <= 0.0 {
326        return Vec::new();
327    }
328
329    let kernel_fn = get_kernel(kernel);
330    let mut s = vec![0.0; n * n];
331
332    for i in 0..n {
333        let mut row_sum = 0.0;
334        for j in 0..n {
335            let u = (x[j] - x[i]) / bandwidth;
336            s[i + j * n] = kernel_fn(u);
337            row_sum += s[i + j * n];
338        }
339        if row_sum > 1e-10 {
340            for j in 0..n {
341                s[i + j * n] /= row_sum;
342            }
343        }
344    }
345
346    s
347}
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352
353    fn uniform_grid(n: usize) -> Vec<f64> {
354        (0..n).map(|i| i as f64 / (n - 1) as f64).collect()
355    }
356
357    // ============== Nadaraya-Watson tests ==============
358
359    #[test]
360    fn test_nw_constant_data() {
361        let x = uniform_grid(20);
362        let y: Vec<f64> = vec![5.0; 20];
363
364        let y_smooth = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
365
366        // Smoothing constant data should return constant
367        for &yi in &y_smooth {
368            assert!(
369                (yi - 5.0).abs() < 0.1,
370                "Constant data should remain constant"
371            );
372        }
373    }
374
375    #[test]
376    fn test_nw_linear_data() {
377        let x = uniform_grid(50);
378        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
379
380        let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "gaussian");
381
382        // Linear data should be approximately preserved (with some edge effects)
383        for i in 10..40 {
384            let expected = 2.0 * x[i] + 1.0;
385            assert!(
386                (y_smooth[i] - expected).abs() < 0.2,
387                "Linear trend should be approximately preserved"
388            );
389        }
390    }
391
392    #[test]
393    fn test_nw_gaussian_vs_epanechnikov() {
394        let x = uniform_grid(30);
395        let y: Vec<f64> = x
396            .iter()
397            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
398            .collect();
399
400        let y_gauss = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
401        let y_epan = nadaraya_watson(&x, &y, &x, 0.1, "epanechnikov");
402
403        // Both should produce valid output
404        assert_eq!(y_gauss.len(), 30);
405        assert_eq!(y_epan.len(), 30);
406
407        // They should be different (different kernels)
408        let diff: f64 = y_gauss
409            .iter()
410            .zip(&y_epan)
411            .map(|(a, b)| (a - b).abs())
412            .sum();
413        assert!(
414            diff > 0.0,
415            "Different kernels should give different results"
416        );
417    }
418
419    #[test]
420    fn test_nw_invalid_input() {
421        // Empty input
422        let result = nadaraya_watson(&[], &[], &[0.5], 0.1, "gaussian");
423        assert_eq!(result, vec![0.0]);
424
425        // Mismatched lengths
426        let result = nadaraya_watson(&[0.0, 1.0], &[1.0], &[0.5], 0.1, "gaussian");
427        assert_eq!(result, vec![0.0]);
428
429        // Zero bandwidth
430        let result = nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.0, "gaussian");
431        assert_eq!(result, vec![0.0]);
432    }
433
434    // ============== Local linear tests ==============
435
436    #[test]
437    fn test_ll_constant_data() {
438        let x = uniform_grid(20);
439        let y: Vec<f64> = vec![3.0; 20];
440
441        let y_smooth = local_linear(&x, &y, &x, 0.15, "gaussian");
442
443        for &yi in &y_smooth {
444            assert!((yi - 3.0).abs() < 0.1, "Constant should remain constant");
445        }
446    }
447
448    #[test]
449    fn test_ll_linear_data_exact() {
450        let x = uniform_grid(30);
451        let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
452
453        let y_smooth = local_linear(&x, &y, &x, 0.2, "gaussian");
454
455        // Local linear should fit linear data exactly (in interior)
456        for i in 5..25 {
457            let expected = 3.0 * x[i] + 2.0;
458            assert!(
459                (y_smooth[i] - expected).abs() < 0.1,
460                "Local linear should fit linear data well"
461            );
462        }
463    }
464
465    #[test]
466    fn test_ll_invalid_input() {
467        let result = local_linear(&[], &[], &[0.5], 0.1, "gaussian");
468        assert_eq!(result, vec![0.0]);
469
470        let result = local_linear(&[0.0, 1.0], &[1.0, 2.0], &[0.5], -0.1, "gaussian");
471        assert_eq!(result, vec![0.0]);
472    }
473
474    // ============== Local polynomial tests ==============
475
476    #[test]
477    fn test_lp_degree1_equals_local_linear() {
478        let x = uniform_grid(25);
479        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
480
481        let y_ll = local_linear(&x, &y, &x, 0.15, "gaussian");
482        let y_lp = local_polynomial(&x, &y, &x, 0.15, 1, "gaussian");
483
484        for i in 0..25 {
485            assert!(
486                (y_ll[i] - y_lp[i]).abs() < 1e-10,
487                "Degree 1 should equal local linear"
488            );
489        }
490    }
491
492    #[test]
493    fn test_lp_quadratic_data() {
494        let x = uniform_grid(40);
495        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
496
497        let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "gaussian");
498
499        // Local quadratic should fit quadratic data well in interior
500        for i in 8..32 {
501            let expected = x[i] * x[i];
502            assert!(
503                (y_smooth[i] - expected).abs() < 0.1,
504                "Local quadratic should fit quadratic data"
505            );
506        }
507    }
508
509    #[test]
510    fn test_lp_invalid_input() {
511        // Zero degree
512        let result = local_polynomial(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, 0, "gaussian");
513        assert_eq!(result, vec![0.0]);
514
515        // Empty input
516        let result = local_polynomial(&[], &[], &[0.5], 0.1, 2, "gaussian");
517        assert_eq!(result, vec![0.0]);
518    }
519
520    // ============== KNN smoother tests ==============
521
522    #[test]
523    fn test_knn_k1_nearest() {
524        let x = vec![0.0, 0.5, 1.0];
525        let y = vec![1.0, 2.0, 3.0];
526
527        let result = knn_smoother(&x, &y, &[0.1, 0.6, 0.9], 1);
528
529        // k=1 should return the nearest neighbor's y value
530        assert!((result[0] - 1.0).abs() < 1e-10, "0.1 nearest to 0.0 -> 1.0");
531        assert!((result[1] - 2.0).abs() < 1e-10, "0.6 nearest to 0.5 -> 2.0");
532        assert!((result[2] - 3.0).abs() < 1e-10, "0.9 nearest to 1.0 -> 3.0");
533    }
534
535    #[test]
536    fn test_knn_k_equals_n_is_mean() {
537        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
538        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
539        let expected_mean = 3.0;
540
541        let result = knn_smoother(&x, &y, &[0.5], 5);
542
543        assert!(
544            (result[0] - expected_mean).abs() < 1e-10,
545            "k=n should return mean"
546        );
547    }
548
549    #[test]
550    fn test_knn_invalid_input() {
551        let result = knn_smoother(&[], &[], &[0.5], 3);
552        assert_eq!(result, vec![0.0]);
553
554        let result = knn_smoother(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0);
555        assert_eq!(result, vec![0.0]);
556    }
557
558    // ============== Smoothing matrix tests ==============
559
560    #[test]
561    fn test_smoothing_matrix_row_stochastic() {
562        let x = uniform_grid(10);
563        let s = smoothing_matrix_nw(&x, 0.2, "gaussian");
564
565        assert_eq!(s.len(), 100);
566
567        // Each row should sum to 1 (row stochastic)
568        for i in 0..10 {
569            let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
570            assert!(
571                (row_sum - 1.0).abs() < 1e-10,
572                "Row {} should sum to 1, got {}",
573                i,
574                row_sum
575            );
576        }
577    }
578
579    #[test]
580    fn test_smoothing_matrix_invalid_input() {
581        let result = smoothing_matrix_nw(&[], 0.1, "gaussian");
582        assert!(result.is_empty());
583
584        let result = smoothing_matrix_nw(&[0.0, 1.0], 0.0, "gaussian");
585        assert!(result.is_empty());
586    }
587
588    #[test]
589    fn test_nan_nw_no_panic() {
590        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
591        let mut y = vec![0.0, 1.0, 2.0, 1.0, 0.0];
592        y[2] = f64::NAN;
593        let result = nadaraya_watson(&x, &y, &x, 0.3, "gaussian");
594        assert_eq!(result.len(), x.len());
595        // NaN should propagate but not panic
596    }
597
598    #[test]
599    fn test_n1_smoother() {
600        // Single data point
601        let x = vec![0.5];
602        let y = vec![3.0];
603        let x_new = vec![0.5];
604        let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian");
605        assert_eq!(result.len(), 1);
606        assert!(
607            (result[0] - 3.0).abs() < 1e-6,
608            "Single point smoother should return the value"
609        );
610    }
611
612    #[test]
613    fn test_duplicate_x_smoother() {
614        // Duplicate x values
615        let x = vec![0.0, 0.0, 0.5, 1.0, 1.0];
616        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
617        let x_new = vec![0.0, 0.5, 1.0];
618        let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian");
619        assert_eq!(result.len(), 3);
620        for v in &result {
621            assert!(v.is_finite());
622        }
623    }
624}