Skip to main content

fdars_core/
smoothing.rs

1//! Smoothing functions for functional data.
2//!
3//! This module provides kernel-based smoothing methods including
4//! Nadaraya-Watson, local linear, and local polynomial regression.
5
6use crate::slice_maybe_parallel;
7#[cfg(feature = "parallel")]
8use rayon::iter::ParallelIterator;
9
10/// Gaussian kernel function.
11fn gaussian_kernel(u: f64) -> f64 {
12    (-0.5 * u * u).exp() / (2.0 * std::f64::consts::PI).sqrt()
13}
14
15/// Epanechnikov kernel function.
16fn epanechnikov_kernel(u: f64) -> f64 {
17    if u.abs() <= 1.0 {
18        0.75 * (1.0 - u * u)
19    } else {
20        0.0
21    }
22}
23
24/// Get kernel function by name.
25fn get_kernel(kernel_type: &str) -> fn(f64) -> f64 {
26    match kernel_type.to_lowercase().as_str() {
27        "epanechnikov" | "epan" => epanechnikov_kernel,
28        _ => gaussian_kernel,
29    }
30}
31
32/// Nadaraya-Watson kernel smoother.
33///
34/// # Arguments
35/// * `x` - Predictor values
36/// * `y` - Response values
37/// * `x_new` - Points at which to evaluate the smoother
38/// * `bandwidth` - Kernel bandwidth
39/// * `kernel` - Kernel type ("gaussian" or "epanechnikov")
40///
41/// # Returns
42/// Smoothed values at x_new
43pub fn nadaraya_watson(
44    x: &[f64],
45    y: &[f64],
46    x_new: &[f64],
47    bandwidth: f64,
48    kernel: &str,
49) -> Vec<f64> {
50    let n = x.len();
51    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
52        return vec![0.0; x_new.len()];
53    }
54
55    let kernel_fn = get_kernel(kernel);
56
57    slice_maybe_parallel!(x_new)
58        .map(|&x0| {
59            let mut num = 0.0;
60            let mut denom = 0.0;
61
62            for i in 0..n {
63                let u = (x[i] - x0) / bandwidth;
64                let w = kernel_fn(u);
65                num += w * y[i];
66                denom += w;
67            }
68
69            if denom > 1e-10 {
70                num / denom
71            } else {
72                0.0
73            }
74        })
75        .collect()
76}
77
78/// Local linear regression smoother.
79///
80/// # Arguments
81/// * `x` - Predictor values
82/// * `y` - Response values
83/// * `x_new` - Points at which to evaluate the smoother
84/// * `bandwidth` - Kernel bandwidth
85/// * `kernel` - Kernel type
86///
87/// # Returns
88/// Smoothed values at x_new
89pub fn local_linear(x: &[f64], y: &[f64], x_new: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
90    let n = x.len();
91    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
92        return vec![0.0; x_new.len()];
93    }
94
95    let kernel_fn = get_kernel(kernel);
96
97    slice_maybe_parallel!(x_new)
98        .map(|&x0| {
99            // Compute weighted moments
100            let mut s0 = 0.0;
101            let mut s1 = 0.0;
102            let mut s2 = 0.0;
103            let mut t0 = 0.0;
104            let mut t1 = 0.0;
105
106            for i in 0..n {
107                let u = (x[i] - x0) / bandwidth;
108                let w = kernel_fn(u);
109                let d = x[i] - x0;
110
111                s0 += w;
112                s1 += w * d;
113                s2 += w * d * d;
114                t0 += w * y[i];
115                t1 += w * y[i] * d;
116            }
117
118            // Solve local linear regression
119            let det = s0 * s2 - s1 * s1;
120            if det.abs() > 1e-10 {
121                (s2 * t0 - s1 * t1) / det
122            } else if s0 > 1e-10 {
123                t0 / s0
124            } else {
125                0.0
126            }
127        })
128        .collect()
129}
130
131/// Accumulate weighted normal equations (X'WX and X'Wy) for local polynomial fit.
132fn accumulate_weighted_normal_equations(
133    x: &[f64],
134    y: &[f64],
135    x0: f64,
136    bandwidth: f64,
137    p: usize,
138    kernel_fn: impl Fn(f64) -> f64,
139) -> (Vec<f64>, Vec<f64>) {
140    let n = x.len();
141    let mut xtx = vec![0.0; p * p];
142    let mut xty = vec![0.0; p];
143
144    for i in 0..n {
145        let u = (x[i] - x0) / bandwidth;
146        let w = kernel_fn(u);
147        let d = x[i] - x0;
148
149        let mut powers = vec![1.0; p];
150        for j in 1..p {
151            powers[j] = powers[j - 1] * d;
152        }
153
154        for j in 0..p {
155            for k in 0..p {
156                xtx[j * p + k] += w * powers[j] * powers[k];
157            }
158            xty[j] += w * powers[j] * y[i];
159        }
160    }
161
162    (xtx, xty)
163}
164
165/// Solve a linear system using Gaussian elimination with partial pivoting.
166/// Returns the solution vector, or a zero vector if the system is singular.
167/// Gaussian elimination with partial pivoting (forward pass).
168/// Find the row with the largest absolute value in column `col` at or below the diagonal.
169fn find_pivot(a: &[f64], p: usize, col: usize) -> usize {
170    let mut max_idx = col;
171    for j in (col + 1)..p {
172        if a[j * p + col].abs() > a[max_idx * p + col].abs() {
173            max_idx = j;
174        }
175    }
176    max_idx
177}
178
179/// Swap two rows in both the matrix `a` and the RHS vector `b`.
180fn swap_rows(a: &mut [f64], b: &mut [f64], p: usize, row_a: usize, row_b: usize) {
181    for k in 0..p {
182        a.swap(row_a * p + k, row_b * p + k);
183    }
184    b.swap(row_a, row_b);
185}
186
187/// Subtract a scaled copy of the pivot row from all rows below it.
188fn eliminate_below(a: &mut [f64], b: &mut [f64], p: usize, pivot_row: usize) {
189    let pivot = a[pivot_row * p + pivot_row];
190    for j in (pivot_row + 1)..p {
191        let factor = a[j * p + pivot_row] / pivot;
192        for k in pivot_row..p {
193            a[j * p + k] -= factor * a[pivot_row * p + k];
194        }
195        b[j] -= factor * b[pivot_row];
196    }
197}
198
199fn forward_eliminate(a: &mut [f64], b: &mut [f64], p: usize) {
200    for i in 0..p {
201        let max_idx = find_pivot(a, p, i);
202        if max_idx != i {
203            swap_rows(a, b, p, i, max_idx);
204        }
205
206        if a[i * p + i].abs() < 1e-10 {
207            continue;
208        }
209
210        eliminate_below(a, b, p, i);
211    }
212}
213
214/// Back substitution for an upper-triangular system.
215fn back_substitute(a: &[f64], b: &[f64], p: usize) -> Vec<f64> {
216    let mut coefs = vec![0.0; p];
217    for i in (0..p).rev() {
218        let mut sum = b[i];
219        for j in (i + 1)..p {
220            sum -= a[i * p + j] * coefs[j];
221        }
222        if a[i * p + i].abs() > 1e-10 {
223            coefs[i] = sum / a[i * p + i];
224        }
225    }
226    coefs
227}
228
229fn solve_gaussian(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
230    forward_eliminate(a, b, p);
231    back_substitute(a, b, p)
232}
233
234/// Local polynomial regression smoother.
235///
236/// # Arguments
237/// * `x` - Predictor values
238/// * `y` - Response values
239/// * `x_new` - Points at which to evaluate the smoother
240/// * `bandwidth` - Kernel bandwidth
241/// * `degree` - Polynomial degree
242/// * `kernel` - Kernel type
243///
244/// # Returns
245/// Smoothed values at x_new
246pub fn local_polynomial(
247    x: &[f64],
248    y: &[f64],
249    x_new: &[f64],
250    bandwidth: f64,
251    degree: usize,
252    kernel: &str,
253) -> Vec<f64> {
254    let n = x.len();
255    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 || degree == 0 {
256        return vec![0.0; x_new.len()];
257    }
258
259    if degree == 1 {
260        return local_linear(x, y, x_new, bandwidth, kernel);
261    }
262
263    let kernel_fn = get_kernel(kernel);
264    let p = degree + 1; // Number of coefficients
265
266    slice_maybe_parallel!(x_new)
267        .map(|&x0| {
268            let (mut xtx, mut xty) =
269                accumulate_weighted_normal_equations(x, y, x0, bandwidth, p, kernel_fn);
270            let coefs = solve_gaussian(&mut xtx, &mut xty, p);
271            coefs[0]
272        })
273        .collect()
274}
275
276/// k-Nearest Neighbors smoother.
277///
278/// # Arguments
279/// * `x` - Predictor values
280/// * `y` - Response values
281/// * `x_new` - Points at which to evaluate the smoother
282/// * `k` - Number of neighbors
283///
284/// # Returns
285/// Smoothed values at x_new
286pub fn knn_smoother(x: &[f64], y: &[f64], x_new: &[f64], k: usize) -> Vec<f64> {
287    let n = x.len();
288    if n == 0 || y.len() != n || x_new.is_empty() || k == 0 {
289        return vec![0.0; x_new.len()];
290    }
291
292    let k = k.min(n);
293
294    slice_maybe_parallel!(x_new)
295        .map(|&x0| {
296            // Compute distances
297            let mut distances: Vec<(usize, f64)> = x
298                .iter()
299                .enumerate()
300                .map(|(i, &xi)| (i, (xi - x0).abs()))
301                .collect();
302
303            // Partial sort to get k nearest
304            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
305
306            // Average of k nearest neighbors
307            let sum: f64 = distances.iter().take(k).map(|(i, _)| y[*i]).sum();
308            sum / k as f64
309        })
310        .collect()
311}
312
313/// Compute smoothing matrix for Nadaraya-Watson.
314///
315/// Returns the smoother matrix S such that y_hat = S * y.
316pub fn smoothing_matrix_nw(x: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
317    let n = x.len();
318    if n == 0 || bandwidth <= 0.0 {
319        return Vec::new();
320    }
321
322    let kernel_fn = get_kernel(kernel);
323    let mut s = vec![0.0; n * n];
324
325    for i in 0..n {
326        let mut row_sum = 0.0;
327        for j in 0..n {
328            let u = (x[j] - x[i]) / bandwidth;
329            s[i + j * n] = kernel_fn(u);
330            row_sum += s[i + j * n];
331        }
332        if row_sum > 1e-10 {
333            for j in 0..n {
334                s[i + j * n] /= row_sum;
335            }
336        }
337    }
338
339    s
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345
346    fn uniform_grid(n: usize) -> Vec<f64> {
347        (0..n).map(|i| i as f64 / (n - 1) as f64).collect()
348    }
349
350    // ============== Nadaraya-Watson tests ==============
351
352    #[test]
353    fn test_nw_constant_data() {
354        let x = uniform_grid(20);
355        let y: Vec<f64> = vec![5.0; 20];
356
357        let y_smooth = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
358
359        // Smoothing constant data should return constant
360        for &yi in &y_smooth {
361            assert!(
362                (yi - 5.0).abs() < 0.1,
363                "Constant data should remain constant"
364            );
365        }
366    }
367
368    #[test]
369    fn test_nw_linear_data() {
370        let x = uniform_grid(50);
371        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
372
373        let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "gaussian");
374
375        // Linear data should be approximately preserved (with some edge effects)
376        for i in 10..40 {
377            let expected = 2.0 * x[i] + 1.0;
378            assert!(
379                (y_smooth[i] - expected).abs() < 0.2,
380                "Linear trend should be approximately preserved"
381            );
382        }
383    }
384
385    #[test]
386    fn test_nw_gaussian_vs_epanechnikov() {
387        let x = uniform_grid(30);
388        let y: Vec<f64> = x
389            .iter()
390            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
391            .collect();
392
393        let y_gauss = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
394        let y_epan = nadaraya_watson(&x, &y, &x, 0.1, "epanechnikov");
395
396        // Both should produce valid output
397        assert_eq!(y_gauss.len(), 30);
398        assert_eq!(y_epan.len(), 30);
399
400        // They should be different (different kernels)
401        let diff: f64 = y_gauss
402            .iter()
403            .zip(&y_epan)
404            .map(|(a, b)| (a - b).abs())
405            .sum();
406        assert!(
407            diff > 0.0,
408            "Different kernels should give different results"
409        );
410    }
411
412    #[test]
413    fn test_nw_invalid_input() {
414        // Empty input
415        let result = nadaraya_watson(&[], &[], &[0.5], 0.1, "gaussian");
416        assert_eq!(result, vec![0.0]);
417
418        // Mismatched lengths
419        let result = nadaraya_watson(&[0.0, 1.0], &[1.0], &[0.5], 0.1, "gaussian");
420        assert_eq!(result, vec![0.0]);
421
422        // Zero bandwidth
423        let result = nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.0, "gaussian");
424        assert_eq!(result, vec![0.0]);
425    }
426
427    // ============== Local linear tests ==============
428
429    #[test]
430    fn test_ll_constant_data() {
431        let x = uniform_grid(20);
432        let y: Vec<f64> = vec![3.0; 20];
433
434        let y_smooth = local_linear(&x, &y, &x, 0.15, "gaussian");
435
436        for &yi in &y_smooth {
437            assert!((yi - 3.0).abs() < 0.1, "Constant should remain constant");
438        }
439    }
440
441    #[test]
442    fn test_ll_linear_data_exact() {
443        let x = uniform_grid(30);
444        let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
445
446        let y_smooth = local_linear(&x, &y, &x, 0.2, "gaussian");
447
448        // Local linear should fit linear data exactly (in interior)
449        for i in 5..25 {
450            let expected = 3.0 * x[i] + 2.0;
451            assert!(
452                (y_smooth[i] - expected).abs() < 0.1,
453                "Local linear should fit linear data well"
454            );
455        }
456    }
457
458    #[test]
459    fn test_ll_invalid_input() {
460        let result = local_linear(&[], &[], &[0.5], 0.1, "gaussian");
461        assert_eq!(result, vec![0.0]);
462
463        let result = local_linear(&[0.0, 1.0], &[1.0, 2.0], &[0.5], -0.1, "gaussian");
464        assert_eq!(result, vec![0.0]);
465    }
466
467    // ============== Local polynomial tests ==============
468
469    #[test]
470    fn test_lp_degree1_equals_local_linear() {
471        let x = uniform_grid(25);
472        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
473
474        let y_ll = local_linear(&x, &y, &x, 0.15, "gaussian");
475        let y_lp = local_polynomial(&x, &y, &x, 0.15, 1, "gaussian");
476
477        for i in 0..25 {
478            assert!(
479                (y_ll[i] - y_lp[i]).abs() < 1e-10,
480                "Degree 1 should equal local linear"
481            );
482        }
483    }
484
485    #[test]
486    fn test_lp_quadratic_data() {
487        let x = uniform_grid(40);
488        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
489
490        let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "gaussian");
491
492        // Local quadratic should fit quadratic data well in interior
493        for i in 8..32 {
494            let expected = x[i] * x[i];
495            assert!(
496                (y_smooth[i] - expected).abs() < 0.1,
497                "Local quadratic should fit quadratic data"
498            );
499        }
500    }
501
502    #[test]
503    fn test_lp_invalid_input() {
504        // Zero degree
505        let result = local_polynomial(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, 0, "gaussian");
506        assert_eq!(result, vec![0.0]);
507
508        // Empty input
509        let result = local_polynomial(&[], &[], &[0.5], 0.1, 2, "gaussian");
510        assert_eq!(result, vec![0.0]);
511    }
512
513    // ============== KNN smoother tests ==============
514
515    #[test]
516    fn test_knn_k1_nearest() {
517        let x = vec![0.0, 0.5, 1.0];
518        let y = vec![1.0, 2.0, 3.0];
519
520        let result = knn_smoother(&x, &y, &[0.1, 0.6, 0.9], 1);
521
522        // k=1 should return the nearest neighbor's y value
523        assert!((result[0] - 1.0).abs() < 1e-10, "0.1 nearest to 0.0 -> 1.0");
524        assert!((result[1] - 2.0).abs() < 1e-10, "0.6 nearest to 0.5 -> 2.0");
525        assert!((result[2] - 3.0).abs() < 1e-10, "0.9 nearest to 1.0 -> 3.0");
526    }
527
528    #[test]
529    fn test_knn_k_equals_n_is_mean() {
530        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
531        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
532        let expected_mean = 3.0;
533
534        let result = knn_smoother(&x, &y, &[0.5], 5);
535
536        assert!(
537            (result[0] - expected_mean).abs() < 1e-10,
538            "k=n should return mean"
539        );
540    }
541
542    #[test]
543    fn test_knn_invalid_input() {
544        let result = knn_smoother(&[], &[], &[0.5], 3);
545        assert_eq!(result, vec![0.0]);
546
547        let result = knn_smoother(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0);
548        assert_eq!(result, vec![0.0]);
549    }
550
551    // ============== Smoothing matrix tests ==============
552
553    #[test]
554    fn test_smoothing_matrix_row_stochastic() {
555        let x = uniform_grid(10);
556        let s = smoothing_matrix_nw(&x, 0.2, "gaussian");
557
558        assert_eq!(s.len(), 100);
559
560        // Each row should sum to 1 (row stochastic)
561        for i in 0..10 {
562            let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
563            assert!(
564                (row_sum - 1.0).abs() < 1e-10,
565                "Row {} should sum to 1, got {}",
566                i,
567                row_sum
568            );
569        }
570    }
571
572    #[test]
573    fn test_smoothing_matrix_invalid_input() {
574        let result = smoothing_matrix_nw(&[], 0.1, "gaussian");
575        assert!(result.is_empty());
576
577        let result = smoothing_matrix_nw(&[0.0, 1.0], 0.0, "gaussian");
578        assert!(result.is_empty());
579    }
580}