Skip to main content

fdars_core/
smoothing.rs

1//! Smoothing functions for functional data.
2//!
3//! This module provides kernel-based smoothing methods including
4//! Nadaraya-Watson, local linear, and local polynomial regression.
5
6use crate::slice_maybe_parallel;
7#[cfg(feature = "parallel")]
8use rayon::iter::ParallelIterator;
9
10/// Gaussian kernel function.
11fn gaussian_kernel(u: f64) -> f64 {
12    (-0.5 * u * u).exp() / (2.0 * std::f64::consts::PI).sqrt()
13}
14
15/// Epanechnikov kernel function.
16fn epanechnikov_kernel(u: f64) -> f64 {
17    if u.abs() <= 1.0 {
18        0.75 * (1.0 - u * u)
19    } else {
20        0.0
21    }
22}
23
24/// Get kernel function by name.
25fn get_kernel(kernel_type: &str) -> fn(f64) -> f64 {
26    match kernel_type.to_lowercase().as_str() {
27        "epanechnikov" | "epan" => epanechnikov_kernel,
28        _ => gaussian_kernel,
29    }
30}
31
32/// Nadaraya-Watson kernel smoother.
33///
34/// # Arguments
35/// * `x` - Predictor values
36/// * `y` - Response values
37/// * `x_new` - Points at which to evaluate the smoother
38/// * `bandwidth` - Kernel bandwidth
39/// * `kernel` - Kernel type ("gaussian" or "epanechnikov")
40///
41/// # Returns
42/// Smoothed values at x_new
43pub fn nadaraya_watson(
44    x: &[f64],
45    y: &[f64],
46    x_new: &[f64],
47    bandwidth: f64,
48    kernel: &str,
49) -> Vec<f64> {
50    let n = x.len();
51    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
52        return vec![0.0; x_new.len()];
53    }
54
55    let kernel_fn = get_kernel(kernel);
56
57    slice_maybe_parallel!(x_new)
58        .map(|&x0| {
59            let mut num = 0.0;
60            let mut denom = 0.0;
61
62            for i in 0..n {
63                let u = (x[i] - x0) / bandwidth;
64                let w = kernel_fn(u);
65                num += w * y[i];
66                denom += w;
67            }
68
69            if denom > 1e-10 {
70                num / denom
71            } else {
72                0.0
73            }
74        })
75        .collect()
76}
77
78/// Local linear regression smoother.
79///
80/// # Arguments
81/// * `x` - Predictor values
82/// * `y` - Response values
83/// * `x_new` - Points at which to evaluate the smoother
84/// * `bandwidth` - Kernel bandwidth
85/// * `kernel` - Kernel type
86///
87/// # Returns
88/// Smoothed values at x_new
89pub fn local_linear(x: &[f64], y: &[f64], x_new: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
90    let n = x.len();
91    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 {
92        return vec![0.0; x_new.len()];
93    }
94
95    let kernel_fn = get_kernel(kernel);
96
97    slice_maybe_parallel!(x_new)
98        .map(|&x0| {
99            // Compute weighted moments
100            let mut s0 = 0.0;
101            let mut s1 = 0.0;
102            let mut s2 = 0.0;
103            let mut t0 = 0.0;
104            let mut t1 = 0.0;
105
106            for i in 0..n {
107                let u = (x[i] - x0) / bandwidth;
108                let w = kernel_fn(u);
109                let d = x[i] - x0;
110
111                s0 += w;
112                s1 += w * d;
113                s2 += w * d * d;
114                t0 += w * y[i];
115                t1 += w * y[i] * d;
116            }
117
118            // Solve local linear regression
119            let det = s0 * s2 - s1 * s1;
120            if det.abs() > 1e-10 {
121                (s2 * t0 - s1 * t1) / det
122            } else if s0 > 1e-10 {
123                t0 / s0
124            } else {
125                0.0
126            }
127        })
128        .collect()
129}
130
131/// Local polynomial regression smoother.
132///
133/// # Arguments
134/// * `x` - Predictor values
135/// * `y` - Response values
136/// * `x_new` - Points at which to evaluate the smoother
137/// * `bandwidth` - Kernel bandwidth
138/// * `degree` - Polynomial degree
139/// * `kernel` - Kernel type
140///
141/// # Returns
142/// Smoothed values at x_new
143pub fn local_polynomial(
144    x: &[f64],
145    y: &[f64],
146    x_new: &[f64],
147    bandwidth: f64,
148    degree: usize,
149    kernel: &str,
150) -> Vec<f64> {
151    let n = x.len();
152    if n == 0 || y.len() != n || x_new.is_empty() || bandwidth <= 0.0 || degree == 0 {
153        return vec![0.0; x_new.len()];
154    }
155
156    if degree == 1 {
157        return local_linear(x, y, x_new, bandwidth, kernel);
158    }
159
160    let kernel_fn = get_kernel(kernel);
161    let p = degree + 1; // Number of coefficients
162
163    slice_maybe_parallel!(x_new)
164        .map(|&x0| {
165            // Build weighted design matrix and response
166            let mut xtx = vec![0.0; p * p];
167            let mut xty = vec![0.0; p];
168
169            for i in 0..n {
170                let u = (x[i] - x0) / bandwidth;
171                let w = kernel_fn(u);
172                let d = x[i] - x0;
173
174                // Build powers of d
175                let mut powers = vec![1.0; p];
176                for j in 1..p {
177                    powers[j] = powers[j - 1] * d;
178                }
179
180                // Accumulate X'WX and X'Wy
181                for j in 0..p {
182                    for k in 0..p {
183                        xtx[j * p + k] += w * powers[j] * powers[k];
184                    }
185                    xty[j] += w * powers[j] * y[i];
186                }
187            }
188
189            // Solve using simple Gaussian elimination (for small p)
190            // For numerical stability in real applications, use proper linear algebra
191            // This is a simplified implementation
192            let mut a = xtx.clone();
193            let mut b = xty.clone();
194
195            for i in 0..p {
196                // Find pivot
197                let mut max_idx = i;
198                for j in (i + 1)..p {
199                    if a[j * p + i].abs() > a[max_idx * p + i].abs() {
200                        max_idx = j;
201                    }
202                }
203
204                // Swap rows
205                if max_idx != i {
206                    for k in 0..p {
207                        a.swap(i * p + k, max_idx * p + k);
208                    }
209                    b.swap(i, max_idx);
210                }
211
212                let pivot = a[i * p + i];
213                if pivot.abs() < 1e-10 {
214                    continue;
215                }
216
217                // Eliminate
218                for j in (i + 1)..p {
219                    let factor = a[j * p + i] / pivot;
220                    for k in i..p {
221                        a[j * p + k] -= factor * a[i * p + k];
222                    }
223                    b[j] -= factor * b[i];
224                }
225            }
226
227            // Back substitution
228            let mut coefs = vec![0.0; p];
229            for i in (0..p).rev() {
230                let mut sum = b[i];
231                for j in (i + 1)..p {
232                    sum -= a[i * p + j] * coefs[j];
233                }
234                if a[i * p + i].abs() > 1e-10 {
235                    coefs[i] = sum / a[i * p + i];
236                }
237            }
238
239            coefs[0] // Return intercept (fitted value at x0)
240        })
241        .collect()
242}
243
244/// k-Nearest Neighbors smoother.
245///
246/// # Arguments
247/// * `x` - Predictor values
248/// * `y` - Response values
249/// * `x_new` - Points at which to evaluate the smoother
250/// * `k` - Number of neighbors
251///
252/// # Returns
253/// Smoothed values at x_new
254pub fn knn_smoother(x: &[f64], y: &[f64], x_new: &[f64], k: usize) -> Vec<f64> {
255    let n = x.len();
256    if n == 0 || y.len() != n || x_new.is_empty() || k == 0 {
257        return vec![0.0; x_new.len()];
258    }
259
260    let k = k.min(n);
261
262    slice_maybe_parallel!(x_new)
263        .map(|&x0| {
264            // Compute distances
265            let mut distances: Vec<(usize, f64)> = x
266                .iter()
267                .enumerate()
268                .map(|(i, &xi)| (i, (xi - x0).abs()))
269                .collect();
270
271            // Partial sort to get k nearest
272            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
273
274            // Average of k nearest neighbors
275            let sum: f64 = distances.iter().take(k).map(|(i, _)| y[*i]).sum();
276            sum / k as f64
277        })
278        .collect()
279}
280
281/// Compute smoothing matrix for Nadaraya-Watson.
282///
283/// Returns the smoother matrix S such that y_hat = S * y.
284pub fn smoothing_matrix_nw(x: &[f64], bandwidth: f64, kernel: &str) -> Vec<f64> {
285    let n = x.len();
286    if n == 0 || bandwidth <= 0.0 {
287        return Vec::new();
288    }
289
290    let kernel_fn = get_kernel(kernel);
291    let mut s = vec![0.0; n * n];
292
293    for i in 0..n {
294        let mut row_sum = 0.0;
295        for j in 0..n {
296            let u = (x[j] - x[i]) / bandwidth;
297            s[i + j * n] = kernel_fn(u);
298            row_sum += s[i + j * n];
299        }
300        if row_sum > 1e-10 {
301            for j in 0..n {
302                s[i + j * n] /= row_sum;
303            }
304        }
305    }
306
307    s
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313
314    fn uniform_grid(n: usize) -> Vec<f64> {
315        (0..n).map(|i| i as f64 / (n - 1) as f64).collect()
316    }
317
318    // ============== Nadaraya-Watson tests ==============
319
320    #[test]
321    fn test_nw_constant_data() {
322        let x = uniform_grid(20);
323        let y: Vec<f64> = vec![5.0; 20];
324
325        let y_smooth = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
326
327        // Smoothing constant data should return constant
328        for &yi in &y_smooth {
329            assert!(
330                (yi - 5.0).abs() < 0.1,
331                "Constant data should remain constant"
332            );
333        }
334    }
335
336    #[test]
337    fn test_nw_linear_data() {
338        let x = uniform_grid(50);
339        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
340
341        let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "gaussian");
342
343        // Linear data should be approximately preserved (with some edge effects)
344        for i in 10..40 {
345            let expected = 2.0 * x[i] + 1.0;
346            assert!(
347                (y_smooth[i] - expected).abs() < 0.2,
348                "Linear trend should be approximately preserved"
349            );
350        }
351    }
352
353    #[test]
354    fn test_nw_gaussian_vs_epanechnikov() {
355        let x = uniform_grid(30);
356        let y: Vec<f64> = x
357            .iter()
358            .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
359            .collect();
360
361        let y_gauss = nadaraya_watson(&x, &y, &x, 0.1, "gaussian");
362        let y_epan = nadaraya_watson(&x, &y, &x, 0.1, "epanechnikov");
363
364        // Both should produce valid output
365        assert_eq!(y_gauss.len(), 30);
366        assert_eq!(y_epan.len(), 30);
367
368        // They should be different (different kernels)
369        let diff: f64 = y_gauss
370            .iter()
371            .zip(&y_epan)
372            .map(|(a, b)| (a - b).abs())
373            .sum();
374        assert!(
375            diff > 0.0,
376            "Different kernels should give different results"
377        );
378    }
379
380    #[test]
381    fn test_nw_invalid_input() {
382        // Empty input
383        let result = nadaraya_watson(&[], &[], &[0.5], 0.1, "gaussian");
384        assert_eq!(result, vec![0.0]);
385
386        // Mismatched lengths
387        let result = nadaraya_watson(&[0.0, 1.0], &[1.0], &[0.5], 0.1, "gaussian");
388        assert_eq!(result, vec![0.0]);
389
390        // Zero bandwidth
391        let result = nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.0, "gaussian");
392        assert_eq!(result, vec![0.0]);
393    }
394
395    // ============== Local linear tests ==============
396
397    #[test]
398    fn test_ll_constant_data() {
399        let x = uniform_grid(20);
400        let y: Vec<f64> = vec![3.0; 20];
401
402        let y_smooth = local_linear(&x, &y, &x, 0.15, "gaussian");
403
404        for &yi in &y_smooth {
405            assert!((yi - 3.0).abs() < 0.1, "Constant should remain constant");
406        }
407    }
408
409    #[test]
410    fn test_ll_linear_data_exact() {
411        let x = uniform_grid(30);
412        let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
413
414        let y_smooth = local_linear(&x, &y, &x, 0.2, "gaussian");
415
416        // Local linear should fit linear data exactly (in interior)
417        for i in 5..25 {
418            let expected = 3.0 * x[i] + 2.0;
419            assert!(
420                (y_smooth[i] - expected).abs() < 0.1,
421                "Local linear should fit linear data well"
422            );
423        }
424    }
425
426    #[test]
427    fn test_ll_invalid_input() {
428        let result = local_linear(&[], &[], &[0.5], 0.1, "gaussian");
429        assert_eq!(result, vec![0.0]);
430
431        let result = local_linear(&[0.0, 1.0], &[1.0, 2.0], &[0.5], -0.1, "gaussian");
432        assert_eq!(result, vec![0.0]);
433    }
434
435    // ============== Local polynomial tests ==============
436
437    #[test]
438    fn test_lp_degree1_equals_local_linear() {
439        let x = uniform_grid(25);
440        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
441
442        let y_ll = local_linear(&x, &y, &x, 0.15, "gaussian");
443        let y_lp = local_polynomial(&x, &y, &x, 0.15, 1, "gaussian");
444
445        for i in 0..25 {
446            assert!(
447                (y_ll[i] - y_lp[i]).abs() < 1e-10,
448                "Degree 1 should equal local linear"
449            );
450        }
451    }
452
453    #[test]
454    fn test_lp_quadratic_data() {
455        let x = uniform_grid(40);
456        let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
457
458        let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "gaussian");
459
460        // Local quadratic should fit quadratic data well in interior
461        for i in 8..32 {
462            let expected = x[i] * x[i];
463            assert!(
464                (y_smooth[i] - expected).abs() < 0.1,
465                "Local quadratic should fit quadratic data"
466            );
467        }
468    }
469
470    #[test]
471    fn test_lp_invalid_input() {
472        // Zero degree
473        let result = local_polynomial(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, 0, "gaussian");
474        assert_eq!(result, vec![0.0]);
475
476        // Empty input
477        let result = local_polynomial(&[], &[], &[0.5], 0.1, 2, "gaussian");
478        assert_eq!(result, vec![0.0]);
479    }
480
481    // ============== KNN smoother tests ==============
482
483    #[test]
484    fn test_knn_k1_nearest() {
485        let x = vec![0.0, 0.5, 1.0];
486        let y = vec![1.0, 2.0, 3.0];
487
488        let result = knn_smoother(&x, &y, &[0.1, 0.6, 0.9], 1);
489
490        // k=1 should return the nearest neighbor's y value
491        assert!((result[0] - 1.0).abs() < 1e-10, "0.1 nearest to 0.0 -> 1.0");
492        assert!((result[1] - 2.0).abs() < 1e-10, "0.6 nearest to 0.5 -> 2.0");
493        assert!((result[2] - 3.0).abs() < 1e-10, "0.9 nearest to 1.0 -> 3.0");
494    }
495
496    #[test]
497    fn test_knn_k_equals_n_is_mean() {
498        let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
499        let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
500        let expected_mean = 3.0;
501
502        let result = knn_smoother(&x, &y, &[0.5], 5);
503
504        assert!(
505            (result[0] - expected_mean).abs() < 1e-10,
506            "k=n should return mean"
507        );
508    }
509
510    #[test]
511    fn test_knn_invalid_input() {
512        let result = knn_smoother(&[], &[], &[0.5], 3);
513        assert_eq!(result, vec![0.0]);
514
515        let result = knn_smoother(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0);
516        assert_eq!(result, vec![0.0]);
517    }
518
519    // ============== Smoothing matrix tests ==============
520
521    #[test]
522    fn test_smoothing_matrix_row_stochastic() {
523        let x = uniform_grid(10);
524        let s = smoothing_matrix_nw(&x, 0.2, "gaussian");
525
526        assert_eq!(s.len(), 100);
527
528        // Each row should sum to 1 (row stochastic)
529        for i in 0..10 {
530            let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
531            assert!(
532                (row_sum - 1.0).abs() < 1e-10,
533                "Row {} should sum to 1, got {}",
534                i,
535                row_sum
536            );
537        }
538    }
539
540    #[test]
541    fn test_smoothing_matrix_invalid_input() {
542        let result = smoothing_matrix_nw(&[], 0.1, "gaussian");
543        assert!(result.is_empty());
544
545        let result = smoothing_matrix_nw(&[0.0, 1.0], 0.0, "gaussian");
546        assert!(result.is_empty());
547    }
548}