Skip to main content

fdars_core/
smoothing.rs

1//! Smoothing functions for functional data.
2//!
3//! This module provides kernel-based smoothing methods including
4//! Nadaraya-Watson, local linear, and local polynomial regression.
5
6use crate::slice_maybe_parallel;
7#[cfg(feature = "parallel")]
8use rayon::iter::ParallelIterator;
9
10/// Gaussian kernel function.
11fn gaussian_kernel(u: f64) -> f64 {
12    (-0.5 * u * u).exp() / (2.0 * std::f64::consts::PI).sqrt()
13}
14
15/// Epanechnikov kernel function.
16fn epanechnikov_kernel(u: f64) -> f64 {
17    if u.abs() <= 1.0 {
18        0.75 * (1.0 - u * u)
19    } else {
20        0.0
21    }
22}
23
24/// Get kernel function by name.
25fn get_kernel(kernel_type: &str) -> fn(f64) -> f64 {
26    match kernel_type.to_lowercase().as_str() {
27        "epanechnikov" | "epan" => epanechnikov_kernel,
28        _ => gaussian_kernel,
29    }
30}
31
32/// Nadaraya-Watson kernel smoother.
33///
34/// # Arguments
35/// * `x` - Predictor values
36/// * `y` - Response values
37/// * `x_new` - Points at which to evaluate the smoother
38/// * `bandwidth` - Kernel bandwidth
39/// * `kernel` - Kernel type ("gaussian" or "epanechnikov")
40///
41/// # Returns
42/// Smoothed values at x_new
43pub fn nadaraya_watson(
44    x: &[f64],
45    y: &[f64],
46    x_new: &[f64],
47    bandwidth: f64,
48    kernel: &str,
49) -> Vec<f64> {
50    let n = x.len();
51    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
52        return vec![0.0; x_new.len()];
53    }
54
55    let kernel_fn = get_kernel(kernel);
56
57    slice_maybe_parallel!(x_new)
58        .map(|&x0| {
59            let mut num = 0.0;
60            let mut denom = 0.0;
61
62            for i in 0..n {
63                let u = (x[i] - x0) / bandwidth;
64                let w = kernel_fn(u);
65                num += w * y[i];
66                denom += w;
67            }
68
69            if denom > 1e-10 {
70                num / denom
71            } else {
72                0.0
73            }
74        })
75        .collect()
76}
77
78/// Local linear regression smoother.
79///
80/// # Arguments
81/// * `x` - Predictor values
82/// * `y` - Response values
83/// * `x_new` - Points at which to evaluate the smoother
84/// * `bandwidth` - Kernel bandwidth
85/// * `kernel` - Kernel type
86///
87/// # Returns
88/// Smoothed values at x_new
89pub fn local_linear(x: &[f64], y: &[f64], x_new: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
90    let n = x.len();
91    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
92        return vec![0.0; x_new.len()];
93    }
94
95    let kernel_fn = get_kernel(kernel);
96
97    slice_maybe_parallel!(x_new)
98        .map(|&x0| {
99            // Compute weighted moments
100            let mut s0 = 0.0;
101            let mut s1 = 0.0;
102            let mut s2 = 0.0;
103            let mut t0 = 0.0;
104            let mut t1 = 0.0;
105
106            for i in 0..n {
107                let u = (x[i] - x0) / bandwidth;
108                let w = kernel_fn(u);
109                let d = x[i] - x0;
110
111                s0 += w;
112                s1 += w * d;
113                s2 += w * d * d;
114                t0 += w * y[i];
115                t1 += w * y[i] * d;
116            }
117
118            // Solve local linear regression
119            let det = s0 * s2 - s1 * s1;
120            if det.abs() > 1e-10 {
121                (s2 * t0 - s1 * t1) / det
122            } else if s0 > 1e-10 {
123                t0 / s0
124            } else {
125                0.0
126            }
127        })
128        .collect()
129}
130
131/// Accumulate weighted normal equations (X'WX and X'Wy) for local polynomial fit.
132fn accumulate_weighted_normal_equations(
133    x: &[f64],
134    y: &[f64],
135    x0: f64,
136    bandwidth: f64,
137    p: usize,
138    kernel_fn: impl Fn(f64) -> f64,
139) -> (Vec<f64>, Vec<f64>) {
140    let n = x.len();
141    let mut xtx = vec![0.0; p * p];
142    let mut xty = vec![0.0; p];
143
144    for i in 0..n {
145        let u = (x[i] - x0) / bandwidth;
146        let w = kernel_fn(u);
147        let d = x[i] - x0;
148
149        for j in 0..p {
150            let w_dj = w * d.powi(j as i32);
151            for k in 0..p {
152                xtx[j * p + k] += w_dj * d.powi(k as i32);
153            }
154            xty[j] += w_dj * y[i];
155        }
156    }
157
158    (xtx, xty)
159}
160
161/// Solve a linear system using Gaussian elimination with partial pivoting.
162/// Returns the solution vector, or a zero vector if the system is singular.
163/// Gaussian elimination with partial pivoting (forward pass).
164/// Find the row with the largest absolute value in column `col` at or below the diagonal.
165fn find_pivot(a: &[f64], p: usize, col: usize) -> usize {
166    let mut max_idx = col;
167    for j in (col + 1)..p {
168        if a[j * p + col].abs() > a[max_idx * p + col].abs() {
169            max_idx = j;
170        }
171    }
172    max_idx
173}
174
175/// Swap two rows in both the matrix `a` and the RHS vector `b`.
176fn swap_rows(a: &mut [f64], b: &mut [f64], p: usize, row_a: usize, row_b: usize) {
177    for k in 0..p {
178        a.swap(row_a * p + k, row_b * p + k);
179    }
180    b.swap(row_a, row_b);
181}
182
183/// Subtract a scaled copy of the pivot row from all rows below it.
184fn eliminate_below(a: &mut [f64], b: &mut [f64], p: usize, pivot_row: usize) {
185    let pivot = a[pivot_row * p + pivot_row];
186    for j in (pivot_row + 1)..p {
187        let factor = a[j * p + pivot_row] / pivot;
188        for k in pivot_row..p {
189            a[j * p + k] -= factor * a[pivot_row * p + k];
190        }
191        b[j] -= factor * b[pivot_row];
192    }
193}
194
195fn forward_eliminate(a: &mut [f64], b: &mut [f64], p: usize) {
196    for i in 0..p {
197        let max_idx = find_pivot(a, p, i);
198        if max_idx != i {
199            swap_rows(a, b, p, i, max_idx);
200        }
201
202        if a[i * p + i].abs() < 1e-10 {
203            continue;
204        }
205
206        eliminate_below(a, b, p, i);
207    }
208}
209
210/// Back substitution for an upper-triangular system.
211fn back_substitute(a: &[f64], b: &[f64], p: usize) -> Vec<f64> {
212    let mut coefs = vec![0.0; p];
213    for i in (0..p).rev() {
214        let mut sum = b[i];
215        for j in (i + 1)..p {
216            sum -= a[i * p + j] * coefs[j];
217        }
218        if a[i * p + i].abs() > 1e-10 {
219            coefs[i] = sum / a[i * p + i];
220        }
221    }
222    coefs
223}
224
225fn solve_gaussian(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
226    forward_eliminate(a, b, p);
227    back_substitute(a, b, p)
228}
229
230/// Local polynomial regression smoother.
231///
232/// # Arguments
233/// * `x` - Predictor values
234/// * `y` - Response values
235/// * `x_new` - Points at which to evaluate the smoother
236/// * `bandwidth` - Kernel bandwidth
237/// * `degree` - Polynomial degree
238/// * `kernel` - Kernel type
239///
240/// # Returns
241/// Smoothed values at x_new
242pub fn local_polynomial(
243    x: &[f64],
244    y: &[f64],
245    x_new: &[f64],
246    bandwidth: f64,
247    degree: usize,
248    kernel: &str,
249) -> Vec<f64> {
250    let n = x.len();
251    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 || degree == 0 {
252        return vec![0.0; x_new.len()];
253    }
254
255    if degree == 1 {
256        return local_linear(x, y, x_new, bandwidth, kernel);
257    }
258
259    let kernel_fn = get_kernel(kernel);
260    let p = degree + 1; // Number of coefficients
261
262    slice_maybe_parallel!(x_new)
263        .map(|&x0| {
264            let (mut xtx, mut xty) =
265                accumulate_weighted_normal_equations(x, y, x0, bandwidth, p, kernel_fn);
266            let coefs = solve_gaussian(&mut xtx, &mut xty, p);
267            coefs[0]
268        })
269        .collect()
270}
271
272/// k-Nearest Neighbors smoother.
273///
274/// # Arguments
275/// * `x` - Predictor values
276/// * `y` - Response values
277/// * `x_new` - Points at which to evaluate the smoother
278/// * `k` - Number of neighbors
279///
280/// # Returns
281/// Smoothed values at x_new
282pub fn knn_smoother(x: &[f64], y: &[f64], x_new: &[f64], k: usize) -> Vec<f64> {
283    let n = x.len();
284    if n == 0 || y.len() != n || x_new.is_empty() || k == 0 {
285        return vec![0.0; x_new.len()];
286    }
287
288    let k = k.min(n);
289
290    slice_maybe_parallel!(x_new)
291        .map(|&x0| {
292            // Compute distances
293            let mut distances: Vec<(usize, f64)> = x
294                .iter()
295                .enumerate()
296                .map(|(i, &xi)| (i, (xi - x0).abs()))
297                .collect();
298
299            // Partial sort to get k nearest
300            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
301
302            // Average of k nearest neighbors
303            let sum: f64 = distances.iter().take(k).map(|(i, _)| y[*i]).sum();
304            sum / k as f64
305        })
306        .collect()
307}
308
309/// Compute smoothing matrix for Nadaraya-Watson.
310///
311/// Returns the smoother matrix S such that y_hat = S * y.
312pub fn smoothing_matrix_nw(x: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
313    let n = x.len();
314    if n == 0 || bandwidth <= 0.0 {
315        return Vec::new();
316    }
317
318    let kernel_fn = get_kernel(kernel);
319    let mut s = vec![0.0; n * n];
320
321    for i in 0..n {
322        let mut row_sum = 0.0;
323        for j in 0..n {
324            let u = (x[j] - x[i]) / bandwidth;
325            s[i + j * n] = kernel_fn(u);
326            row_sum += s[i + j * n];
327        }
328        if row_sum > 1e-10 {
329            for j in 0..n {
330                s[i + j * n] /= row_sum;
331            }
332        }
333    }
334
335    s
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341
342    fn uniform_grid(n: usize) -> Vec<f64> {
343        (0..n).map(|i| i as f64 / (n - 1) as f64).collect()
344    }
345
346    // ============== Nadaraya-Watson tests ==============
347
348    #[test]
349    fn test_nw_constant_data() {
350        let x = uniform_grid(20);
351        let y: Vec<f64> = vec![5.0; 20];
352
353        let y_smooth = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
354
355        // Smoothing constant data should return constant
356        for &yi in &y_smooth {
357            assert!(
358                (yi - 5.0).abs() < 0.1,
359                "Constant data should remain constant"
360            );
361        }
362    }
363
364    #[test]
365    fn test_nw_linear_data() {
366        let x = uniform_grid(50);
367        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
368
369        let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "gaussian");
370
371        // Linear data should be approximately preserved (with some edge effects)
372        for i in 10..40 {
373            let expected = 2.0 * x[i] + 1.0;
374            assert!(
375                (y_smooth[i] - expected).abs() < 0.2,
376                "Linear trend should be approximately preserved"
377            );
378        }
379    }
380
381    #[test]
382    fn test_nw_gaussian_vs_epanechnikov() {
383        let x = uniform_grid(30);
384        let y: Vec<f64> = x
385            .iter()
386            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
387            .collect();
388
389        let y_gauss = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
390        let y_epan = nadaraya_watson(&x, &y, &x, 0.1, "epanechnikov");
391
392        // Both should produce valid output
393        assert_eq!(y_gauss.len(), 30);
394        assert_eq!(y_epan.len(), 30);
395
396        // They should be different (different kernels)
397        let diff: f64 = y_gauss
398            .iter()
399            .zip(&y_epan)
400            .map(|(a, b)| (a - b).abs())
401            .sum();
402        assert!(
403            diff > 0.0,
404            "Different kernels should give different results"
405        );
406    }
407
408    #[test]
409    fn test_nw_invalid_input() {
410        // Empty input
411        let result = nadaraya_watson(&[], &[], &[0.5], 0.1, "gaussian");
412        assert_eq!(result, vec![0.0]);
413
414        // Mismatched lengths
415        let result = nadaraya_watson(&[0.0, 1.0], &[1.0], &[0.5], 0.1, "gaussian");
416        assert_eq!(result, vec![0.0]);
417
418        // Zero bandwidth
419        let result = nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.0, "gaussian");
420        assert_eq!(result, vec![0.0]);
421    }
422
423    // ============== Local linear tests ==============
424
425    #[test]
426    fn test_ll_constant_data() {
427        let x = uniform_grid(20);
428        let y: Vec<f64> = vec![3.0; 20];
429
430        let y_smooth = local_linear(&x, &y, &x, 0.15, "gaussian");
431
432        for &yi in &y_smooth {
433            assert!((yi - 3.0).abs() < 0.1, "Constant should remain constant");
434        }
435    }
436
437    #[test]
438    fn test_ll_linear_data_exact() {
439        let x = uniform_grid(30);
440        let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
441
442        let y_smooth = local_linear(&x, &y, &x, 0.2, "gaussian");
443
444        // Local linear should fit linear data exactly (in interior)
445        for i in 5..25 {
446            let expected = 3.0 * x[i] + 2.0;
447            assert!(
448                (y_smooth[i] - expected).abs() < 0.1,
449                "Local linear should fit linear data well"
450            );
451        }
452    }
453
454    #[test]
455    fn test_ll_invalid_input() {
456        let result = local_linear(&[], &[], &[0.5], 0.1, "gaussian");
457        assert_eq!(result, vec![0.0]);
458
459        let result = local_linear(&[0.0, 1.0], &[1.0, 2.0], &[0.5], -0.1, "gaussian");
460        assert_eq!(result, vec![0.0]);
461    }
462
463    // ============== Local polynomial tests ==============
464
465    #[test]
466    fn test_lp_degree1_equals_local_linear() {
467        let x = uniform_grid(25);
468        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
469
470        let y_ll = local_linear(&x, &y, &x, 0.15, "gaussian");
471        let y_lp = local_polynomial(&x, &y, &x, 0.15, 1, "gaussian");
472
473        for i in 0..25 {
474            assert!(
475                (y_ll[i] - y_lp[i]).abs() < 1e-10,
476                "Degree 1 should equal local linear"
477            );
478        }
479    }
480
481    #[test]
482    fn test_lp_quadratic_data() {
483        let x = uniform_grid(40);
484        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
485
486        let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "gaussian");
487
488        // Local quadratic should fit quadratic data well in interior
489        for i in 8..32 {
490            let expected = x[i] * x[i];
491            assert!(
492                (y_smooth[i] - expected).abs() < 0.1,
493                "Local quadratic should fit quadratic data"
494            );
495        }
496    }
497
498    #[test]
499    fn test_lp_invalid_input() {
500        // Zero degree
501        let result = local_polynomial(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, 0, "gaussian");
502        assert_eq!(result, vec![0.0]);
503
504        // Empty input
505        let result = local_polynomial(&[], &[], &[0.5], 0.1, 2, "gaussian");
506        assert_eq!(result, vec![0.0]);
507    }
508
509    // ============== KNN smoother tests ==============
510
511    #[test]
512    fn test_knn_k1_nearest() {
513        let x = vec![0.0, 0.5, 1.0];
514        let y = vec![1.0, 2.0, 3.0];
515
516        let result = knn_smoother(&x, &y, &[0.1, 0.6, 0.9], 1);
517
518        // k=1 should return the nearest neighbor's y value
519        assert!((result[0] - 1.0).abs() < 1e-10, "0.1 nearest to 0.0 -> 1.0");
520        assert!((result[1] - 2.0).abs() < 1e-10, "0.6 nearest to 0.5 -> 2.0");
521        assert!((result[2] - 3.0).abs() < 1e-10, "0.9 nearest to 1.0 -> 3.0");
522    }
523
524    #[test]
525    fn test_knn_k_equals_n_is_mean() {
526        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
527        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
528        let expected_mean = 3.0;
529
530        let result = knn_smoother(&x, &y, &[0.5], 5);
531
532        assert!(
533            (result[0] - expected_mean).abs() < 1e-10,
534            "k=n should return mean"
535        );
536    }
537
538    #[test]
539    fn test_knn_invalid_input() {
540        let result = knn_smoother(&[], &[], &[0.5], 3);
541        assert_eq!(result, vec![0.0]);
542
543        let result = knn_smoother(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0);
544        assert_eq!(result, vec![0.0]);
545    }
546
547    // ============== Smoothing matrix tests ==============
548
549    #[test]
550    fn test_smoothing_matrix_row_stochastic() {
551        let x = uniform_grid(10);
552        let s = smoothing_matrix_nw(&x, 0.2, "gaussian");
553
554        assert_eq!(s.len(), 100);
555
556        // Each row should sum to 1 (row stochastic)
557        for i in 0..10 {
558            let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
559            assert!(
560                (row_sum - 1.0).abs() < 1e-10,
561                "Row {} should sum to 1, got {}",
562                i,
563                row_sum
564            );
565        }
566    }
567
568    #[test]
569    fn test_smoothing_matrix_invalid_input() {
570        let result = smoothing_matrix_nw(&[], 0.1, "gaussian");
571        assert!(result.is_empty());
572
573        let result = smoothing_matrix_nw(&[0.0, 1.0], 0.0, "gaussian");
574        assert!(result.is_empty());
575    }
576
577    #[test]
578    fn test_nan_nw_no_panic() {
579        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
580        let mut y = vec![0.0, 1.0, 2.0, 1.0, 0.0];
581        y[2] = f64::NAN;
582        let result = nadaraya_watson(&x, &y, &x, 0.3, "gaussian");
583        assert_eq!(result.len(), x.len());
584        // NaN should propagate but not panic
585    }
586
587    #[test]
588    fn test_n1_smoother() {
589        // Single data point
590        let x = vec![0.5];
591        let y = vec![3.0];
592        let x_new = vec![0.5];
593        let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian");
594        assert_eq!(result.len(), 1);
595        assert!(
596            (result[0] - 3.0).abs() < 1e-6,
597            "Single point smoother should return the value"
598        );
599    }
600
601    #[test]
602    fn test_duplicate_x_smoother() {
603        // Duplicate x values
604        let x = vec![0.0, 0.0, 0.5, 1.0, 1.0];
605        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
606        let x_new = vec![0.0, 0.5, 1.0];
607        let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian");
608        assert_eq!(result.len(), 3);
609        for v in &result {
610            assert!(v.is_finite());
611        }
612    }
613}