1use crate::error::FdarError;
7use crate::slice_maybe_parallel;
8#[cfg(feature = "parallel")]
9use rayon::iter::ParallelIterator;
10
11fn gaussian_kernel(u: f64) -> f64 {
13 (-0.5 * u * u).exp() / (2.0 * std::f64::consts::PI).sqrt()
14}
15
16fn epanechnikov_kernel(u: f64) -> f64 {
18 if u.abs() <= 1.0 {
19 0.75 * (1.0 - u * u)
20 } else {
21 0.0
22 }
23}
24
25fn tricube_kernel(u: f64) -> f64 {
27 let abs_u = u.abs();
28 if abs_u < 1.0 {
29 (1.0 - abs_u.powi(3)).powi(3)
30 } else {
31 0.0
32 }
33}
34
35fn get_kernel(kernel_type: &str) -> fn(f64) -> f64 {
37 match kernel_type.to_lowercase().as_str() {
38 "epanechnikov" | "epan" => epanechnikov_kernel,
39 "tricube" | "tri-cube" => tricube_kernel,
40 _ => gaussian_kernel,
41 }
42}
43
44pub fn nadaraya_watson(
73 x: &[f64],
74 y: &[f64],
75 x_new: &[f64],
76 bandwidth: f64,
77 kernel: &str,
78) -> Result<Vec<f64>, FdarError> {
79 let n = x.len();
80 if n == 0 {
81 return Err(FdarError::InvalidDimension {
82 parameter: "x",
83 expected: "non-empty slice".to_string(),
84 actual: "empty".to_string(),
85 });
86 }
87 if y.len() != n {
88 return Err(FdarError::InvalidDimension {
89 parameter: "y",
90 expected: format!("length {n} (matching x)"),
91 actual: format!("length {}", y.len()),
92 });
93 }
94 if x_new.is_empty() {
95 return Err(FdarError::InvalidDimension {
96 parameter: "x_new",
97 expected: "non-empty slice".to_string(),
98 actual: "empty".to_string(),
99 });
100 }
101 if bandwidth <= 0.0 {
102 return Err(FdarError::InvalidParameter {
103 parameter: "bandwidth",
104 message: format!("must be positive, got {bandwidth}"),
105 });
106 }
107
108 let kernel_fn = get_kernel(kernel);
109
110 Ok(slice_maybe_parallel!(x_new)
111 .map(|&x0| {
112 let mut num = 0.0;
113 let mut denom = 0.0;
114
115 for i in 0..n {
116 let u = (x[i] - x0) / bandwidth;
117 let w = kernel_fn(u);
118 num += w * y[i];
119 denom += w;
120 }
121
122 if denom > 1e-10 {
123 num / denom
124 } else {
125 0.0
126 }
127 })
128 .collect())
129}
130
131pub fn local_linear(
161 x: &[f64],
162 y: &[f64],
163 x_new: &[f64],
164 bandwidth: f64,
165 kernel: &str,
166) -> Result<Vec<f64>, FdarError> {
167 let n = x.len();
168 if n == 0 {
169 return Err(FdarError::InvalidDimension {
170 parameter: "x",
171 expected: "non-empty slice".to_string(),
172 actual: "empty".to_string(),
173 });
174 }
175 if y.len() != n {
176 return Err(FdarError::InvalidDimension {
177 parameter: "y",
178 expected: format!("length {n} (matching x)"),
179 actual: format!("length {}", y.len()),
180 });
181 }
182 if x_new.is_empty() {
183 return Err(FdarError::InvalidDimension {
184 parameter: "x_new",
185 expected: "non-empty slice".to_string(),
186 actual: "empty".to_string(),
187 });
188 }
189 if bandwidth <= 0.0 {
190 return Err(FdarError::InvalidParameter {
191 parameter: "bandwidth",
192 message: format!("must be positive, got {bandwidth}"),
193 });
194 }
195
196 let kernel_fn = get_kernel(kernel);
197
198 Ok(slice_maybe_parallel!(x_new)
199 .map(|&x0| {
200 let mut s0 = 0.0;
202 let mut s1 = 0.0;
203 let mut s2 = 0.0;
204 let mut t0 = 0.0;
205 let mut t1 = 0.0;
206
207 for i in 0..n {
208 let u = (x[i] - x0) / bandwidth;
209 let w = kernel_fn(u);
210 let d = x[i] - x0;
211
212 s0 += w;
213 s1 += w * d;
214 s2 += w * d * d;
215 t0 += w * y[i];
216 t1 += w * y[i] * d;
217 }
218
219 let det = s0 * s2 - s1 * s1;
221 if det.abs() > 1e-10 {
222 (s2 * t0 - s1 * t1) / det
223 } else if s0 > 1e-10 {
224 t0 / s0
225 } else {
226 0.0
227 }
228 })
229 .collect())
230}
231
232fn accumulate_weighted_normal_equations(
234 x: &[f64],
235 y: &[f64],
236 x0: f64,
237 bandwidth: f64,
238 p: usize,
239 kernel_fn: impl Fn(f64) -> f64,
240) -> (Vec<f64>, Vec<f64>) {
241 let n = x.len();
242 let mut xtx = vec![0.0; p * p];
243 let mut xty = vec![0.0; p];
244
245 for i in 0..n {
246 let u = (x[i] - x0) / bandwidth;
247 let w = kernel_fn(u);
248 let d = x[i] - x0;
249
250 for j in 0..p {
251 let w_dj = w * d.powi(j as i32);
252 for k in 0..p {
253 xtx[j * p + k] += w_dj * d.powi(k as i32);
254 }
255 xty[j] += w_dj * y[i];
256 }
257 }
258
259 (xtx, xty)
260}
261
262fn find_pivot(a: &[f64], p: usize, col: usize) -> usize {
267 let mut max_idx = col;
268 for j in (col + 1)..p {
269 if a[j * p + col].abs() > a[max_idx * p + col].abs() {
270 max_idx = j;
271 }
272 }
273 max_idx
274}
275
276fn swap_rows(a: &mut [f64], b: &mut [f64], p: usize, row_a: usize, row_b: usize) {
278 for k in 0..p {
279 a.swap(row_a * p + k, row_b * p + k);
280 }
281 b.swap(row_a, row_b);
282}
283
284fn eliminate_below(a: &mut [f64], b: &mut [f64], p: usize, pivot_row: usize) {
286 let pivot = a[pivot_row * p + pivot_row];
287 for j in (pivot_row + 1)..p {
288 let factor = a[j * p + pivot_row] / pivot;
289 for k in pivot_row..p {
290 a[j * p + k] -= factor * a[pivot_row * p + k];
291 }
292 b[j] -= factor * b[pivot_row];
293 }
294}
295
296fn forward_eliminate(a: &mut [f64], b: &mut [f64], p: usize) {
297 for i in 0..p {
298 let max_idx = find_pivot(a, p, i);
299 if max_idx != i {
300 swap_rows(a, b, p, i, max_idx);
301 }
302
303 if a[i * p + i].abs() < 1e-10 {
304 continue;
305 }
306
307 eliminate_below(a, b, p, i);
308 }
309}
310
311fn back_substitute(a: &[f64], b: &[f64], p: usize) -> Vec<f64> {
313 let mut coefs = vec![0.0; p];
314 for i in (0..p).rev() {
315 let mut sum = b[i];
316 for j in (i + 1)..p {
317 sum -= a[i * p + j] * coefs[j];
318 }
319 if a[i * p + i].abs() > 1e-10 {
320 coefs[i] = sum / a[i * p + i];
321 }
322 }
323 coefs
324}
325
326fn solve_gaussian(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
327 forward_eliminate(a, b, p);
328 back_substitute(a, b, p)
329}
330
331pub fn solve_gaussian_pub(a: &mut [f64], b: &mut [f64], p: usize) -> Vec<f64> {
337 solve_gaussian(a, b, p)
338}
339
340pub fn local_polynomial(
358 x: &[f64],
359 y: &[f64],
360 x_new: &[f64],
361 bandwidth: f64,
362 degree: usize,
363 kernel: &str,
364) -> Result<Vec<f64>, FdarError> {
365 let n = x.len();
366 if n == 0 {
367 return Err(FdarError::InvalidDimension {
368 parameter: "x",
369 expected: "non-empty slice".to_string(),
370 actual: "empty".to_string(),
371 });
372 }
373 if y.len() != n {
374 return Err(FdarError::InvalidDimension {
375 parameter: "y",
376 expected: format!("length {n} (matching x)"),
377 actual: format!("length {}", y.len()),
378 });
379 }
380 if x_new.is_empty() {
381 return Err(FdarError::InvalidDimension {
382 parameter: "x_new",
383 expected: "non-empty slice".to_string(),
384 actual: "empty".to_string(),
385 });
386 }
387 if bandwidth <= 0.0 {
388 return Err(FdarError::InvalidParameter {
389 parameter: "bandwidth",
390 message: format!("must be positive, got {bandwidth}"),
391 });
392 }
393 if degree == 0 {
394 return nadaraya_watson(x, y, x_new, bandwidth, kernel);
395 }
396
397 if degree == 1 {
398 return local_linear(x, y, x_new, bandwidth, kernel);
399 }
400
401 let kernel_fn = get_kernel(kernel);
402 let p = degree + 1; Ok(slice_maybe_parallel!(x_new)
405 .map(|&x0| {
406 let (mut xtx, mut xty) =
407 accumulate_weighted_normal_equations(x, y, x0, bandwidth, p, kernel_fn);
408 let coefs = solve_gaussian(&mut xtx, &mut xty, p);
409 coefs[0]
410 })
411 .collect())
412}
413
414pub fn knn_smoother(x: &[f64], y: &[f64], x_new: &[f64], k: usize) -> Result<Vec<f64>, FdarError> {
430 let n = x.len();
431 if n == 0 {
432 return Err(FdarError::InvalidDimension {
433 parameter: "x",
434 expected: "non-empty slice".to_string(),
435 actual: "empty".to_string(),
436 });
437 }
438 if y.len() != n {
439 return Err(FdarError::InvalidDimension {
440 parameter: "y",
441 expected: format!("length {n} (matching x)"),
442 actual: format!("length {}", y.len()),
443 });
444 }
445 if x_new.is_empty() {
446 return Err(FdarError::InvalidDimension {
447 parameter: "x_new",
448 expected: "non-empty slice".to_string(),
449 actual: "empty".to_string(),
450 });
451 }
452 if k == 0 {
453 return Err(FdarError::InvalidParameter {
454 parameter: "k",
455 message: "must be at least 1".to_string(),
456 });
457 }
458
459 let k = k.min(n);
460
461 Ok(slice_maybe_parallel!(x_new)
462 .map(|&x0| {
463 let mut distances: Vec<(usize, f64)> = x
465 .iter()
466 .enumerate()
467 .map(|(i, &xi)| (i, (xi - x0).abs()))
468 .collect();
469
470 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
472
473 let sum: f64 = distances.iter().take(k).map(|(i, _)| y[*i]).sum();
475 sum / k as f64
476 })
477 .collect())
478}
479
480pub fn smoothing_matrix_nw(x: &[f64], bandwidth: f64, kernel: &str) -> Result<Vec<f64>, FdarError> {
488 let n = x.len();
489 if n == 0 {
490 return Err(FdarError::InvalidDimension {
491 parameter: "x",
492 expected: "non-empty slice".to_string(),
493 actual: "empty".to_string(),
494 });
495 }
496 if bandwidth <= 0.0 {
497 return Err(FdarError::InvalidParameter {
498 parameter: "bandwidth",
499 message: format!("must be positive, got {bandwidth}"),
500 });
501 }
502
503 let kernel_fn = get_kernel(kernel);
504 let mut s = vec![0.0; n * n];
505
506 for i in 0..n {
507 let mut row_sum = 0.0;
508 for j in 0..n {
509 let u = (x[j] - x[i]) / bandwidth;
510 s[i + j * n] = kernel_fn(u);
511 row_sum += s[i + j * n];
512 }
513 if row_sum > 1e-10 {
514 for j in 0..n {
515 s[i + j * n] /= row_sum;
516 }
517 }
518 }
519
520 Ok(s)
521}
522
523#[derive(Debug, Clone, Copy, PartialEq)]
527pub enum CvCriterion {
528 Cv,
530 Gcv,
532}
533
534#[derive(Debug, Clone)]
536pub struct OptimBandwidthResult {
537 pub h_opt: f64,
539 pub criterion: CvCriterion,
541 pub value: f64,
543}
544
545pub fn cv_smoother(x: &[f64], y: &[f64], bandwidth: f64, kernel: &str) -> f64 {
559 let n = x.len();
560 if n < 2 || y.len() != n || bandwidth <= 0.0 {
561 return f64::INFINITY;
562 }
563
564 let mut s = match smoothing_matrix_nw(x, bandwidth, kernel) {
566 Ok(s) => s,
567 Err(_) => return f64::INFINITY,
568 };
569
570 for i in 0..n {
572 s[i + i * n] = 0.0;
573 }
574
575 for i in 0..n {
577 let row_sum: f64 = (0..n).map(|j| s[i + j * n]).sum();
578 if row_sum > 1e-10 {
579 for j in 0..n {
580 s[i + j * n] /= row_sum;
581 }
582 }
583 }
584
585 let mut mse = 0.0;
587 for i in 0..n {
588 let y_hat: f64 = (0..n).map(|j| s[i + j * n] * y[j]).sum();
589 let resid = y[i] - y_hat;
590 mse += resid * resid;
591 }
592 mse / n as f64
593}
594
595pub fn gcv_smoother(x: &[f64], y: &[f64], bandwidth: f64, kernel: &str) -> f64 {
608 let n = x.len();
609 if n < 2 || y.len() != n || bandwidth <= 0.0 {
610 return f64::INFINITY;
611 }
612
613 let s = match smoothing_matrix_nw(x, bandwidth, kernel) {
614 Ok(s) => s,
615 Err(_) => return f64::INFINITY,
616 };
617
618 let mut rss = 0.0;
620 for i in 0..n {
621 let y_hat: f64 = (0..n).map(|j| s[i + j * n] * y[j]).sum();
622 let resid = y[i] - y_hat;
623 rss += resid * resid;
624 }
625
626 let trace_s: f64 = (0..n).map(|i| s[i + i * n]).sum();
628
629 let denom = 1.0 - trace_s / n as f64;
630 if denom.abs() < 1e-10 {
631 f64::INFINITY
632 } else {
633 (rss / n as f64) / (denom * denom)
634 }
635}
636
637pub fn optim_bandwidth(
663 x: &[f64],
664 y: &[f64],
665 h_range: Option<(f64, f64)>,
666 criterion: CvCriterion,
667 kernel: &str,
668 n_grid: usize,
669) -> OptimBandwidthResult {
670 let n = x.len();
671 let n_grid = n_grid.max(2);
672
673 let (h_min, h_max) = match h_range {
675 Some((lo, hi)) if lo > 0.0 && hi > lo => (lo, hi),
676 _ => {
677 let x_min = x.iter().copied().fold(f64::INFINITY, f64::min);
678 let x_max = x.iter().copied().fold(f64::NEG_INFINITY, f64::max);
679 let h_default = (x_max - x_min) / (n as f64).powf(0.2);
680 let h_default = h_default.max(1e-10);
681 (h_default / 5.0, h_default * 5.0)
682 }
683 };
684
685 let score_fn = match criterion {
686 CvCriterion::Cv => cv_smoother,
687 CvCriterion::Gcv => gcv_smoother,
688 };
689
690 let mut best_h = h_min;
691 let mut best_score = f64::INFINITY;
692
693 for i in 0..n_grid {
694 let h = h_min + (h_max - h_min) * i as f64 / (n_grid - 1) as f64;
695 let score = score_fn(x, y, h, kernel);
696 if score < best_score {
697 best_score = score;
698 best_h = h;
699 }
700 }
701
702 OptimBandwidthResult {
703 h_opt: best_h,
704 criterion,
705 value: best_score,
706 }
707}
708
709#[derive(Debug, Clone)]
713pub struct KnnCvResult {
714 pub optimal_k: usize,
716 pub cv_errors: Vec<f64>,
718}
719
720pub fn knn_gcv(x: &[f64], y: &[f64], max_k: usize) -> KnnCvResult {
730 let n = x.len();
731 let max_k = max_k.min(n.saturating_sub(1)).max(1);
732
733 let mut sorted_neighbors: Vec<Vec<(usize, f64)>> = Vec::with_capacity(n);
735 for i in 0..n {
736 let mut dists: Vec<(usize, f64)> = (0..n)
737 .filter(|&j| j != i)
738 .map(|j| (j, (x[j] - x[i]).abs()))
739 .collect();
740 dists.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
741 sorted_neighbors.push(dists);
742 }
743
744 let mut cv_errors = Vec::with_capacity(max_k);
745
746 for k in 1..=max_k {
747 let mut mse = 0.0;
748 for i in 0..n {
749 let neighbors = &sorted_neighbors[i];
750 let d_k = if k <= neighbors.len() {
752 neighbors[k - 1].1
753 } else {
754 neighbors.last().map_or(1.0, |x| x.1)
755 };
756 let d_k1 = if k < neighbors.len() {
757 neighbors[k].1
758 } else {
759 d_k * 2.0
760 };
761 let h = (d_k + d_k1) / 2.0;
762 let h = h.max(1e-10);
763
764 let mut num = 0.0;
766 let mut den = 0.0;
767 for &(j, dist) in neighbors.iter().take(k) {
768 let u = dist / h;
769 let w = epanechnikov_kernel(u);
770 num += w * y[j];
771 den += w;
772 }
773 let y_hat = if den > 1e-10 { num / den } else { y[i] };
774 mse += (y[i] - y_hat).powi(2);
775 }
776 cv_errors.push(mse / n as f64);
777 }
778
779 let optimal_k = cv_errors
780 .iter()
781 .enumerate()
782 .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
783 .map_or(1, |(i, _)| i + 1);
784
785 KnnCvResult {
786 optimal_k,
787 cv_errors,
788 }
789}
790
791pub fn knn_lcv(x: &[f64], y: &[f64], max_k: usize) -> Vec<usize> {
804 let n = x.len();
805 let max_k = max_k.min(n.saturating_sub(1)).max(1);
806
807 let mut per_obs_k = vec![1usize; n];
808
809 for i in 0..n {
810 let mut neighbors: Vec<(usize, f64)> = (0..n)
812 .filter(|&j| j != i)
813 .map(|j| (j, (x[j] - x[i]).abs()))
814 .collect();
815 neighbors.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
816
817 let mut best_k = 1;
818 let mut best_err = f64::INFINITY;
819
820 for k in 1..=max_k {
821 let sum: f64 = neighbors.iter().take(k).map(|&(j, _)| y[j]).sum();
823 let y_hat = sum / k as f64;
824 let err = (y[i] - y_hat).abs();
825 if err < best_err {
826 best_err = err;
827 best_k = k;
828 }
829 }
830 per_obs_k[i] = best_k;
831 }
832
833 per_obs_k
834}
835
836#[cfg(test)]
837mod tests {
838 use super::*;
839 use crate::test_helpers::uniform_grid;
840
841 #[test]
844 fn test_nw_constant_data() {
845 let x = uniform_grid(20);
846 let y: Vec<f64> = vec![5.0; 20];
847
848 let y_smooth = nadaraya_watson(&x, &y, &x, 0.1, "gaussian").unwrap();
849
850 for &yi in &y_smooth {
852 assert!(
853 (yi - 5.0).abs() < 0.1,
854 "Constant data should remain constant"
855 );
856 }
857 }
858
859 #[test]
860 fn test_nw_linear_data() {
861 let x = uniform_grid(50);
862 let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
863
864 let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "gaussian").unwrap();
865
866 for i in 10..40 {
868 let expected = 2.0 * x[i] + 1.0;
869 assert!(
870 (y_smooth[i] - expected).abs() < 0.2,
871 "Linear trend should be approximately preserved"
872 );
873 }
874 }
875
876 #[test]
877 fn test_nw_gaussian_vs_epanechnikov() {
878 let x = uniform_grid(30);
879 let y: Vec<f64> = x
880 .iter()
881 .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
882 .collect();
883
884 let y_gauss = nadaraya_watson(&x, &y, &x, 0.1, "gaussian").unwrap();
885 let y_epan = nadaraya_watson(&x, &y, &x, 0.1, "epanechnikov").unwrap();
886
887 assert_eq!(y_gauss.len(), 30);
889 assert_eq!(y_epan.len(), 30);
890
891 let diff: f64 = y_gauss
893 .iter()
894 .zip(&y_epan)
895 .map(|(a, b)| (a - b).abs())
896 .sum();
897 assert!(
898 diff > 0.0,
899 "Different kernels should give different results"
900 );
901 }
902
903 #[test]
904 fn test_nw_invalid_input() {
905 assert!(nadaraya_watson(&[], &[], &[0.5], 0.1, "gaussian").is_err());
907
908 assert!(nadaraya_watson(&[0.0, 1.0], &[1.0], &[0.5], 0.1, "gaussian").is_err());
910
911 assert!(nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.0, "gaussian").is_err());
913
914 assert!(nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[], 0.1, "gaussian").is_err());
916 }
917
918 #[test]
921 fn test_ll_constant_data() {
922 let x = uniform_grid(20);
923 let y: Vec<f64> = vec![3.0; 20];
924
925 let y_smooth = local_linear(&x, &y, &x, 0.15, "gaussian").unwrap();
926
927 for &yi in &y_smooth {
928 assert!((yi - 3.0).abs() < 0.1, "Constant should remain constant");
929 }
930 }
931
932 #[test]
933 fn test_ll_linear_data_exact() {
934 let x = uniform_grid(30);
935 let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
936
937 let y_smooth = local_linear(&x, &y, &x, 0.2, "gaussian").unwrap();
938
939 for i in 5..25 {
941 let expected = 3.0 * x[i] + 2.0;
942 assert!(
943 (y_smooth[i] - expected).abs() < 0.1,
944 "Local linear should fit linear data well"
945 );
946 }
947 }
948
949 #[test]
950 fn test_ll_invalid_input() {
951 assert!(local_linear(&[], &[], &[0.5], 0.1, "gaussian").is_err());
952
953 assert!(local_linear(&[0.0, 1.0], &[1.0, 2.0], &[0.5], -0.1, "gaussian").is_err());
954 }
955
956 #[test]
959 fn test_lp_degree1_equals_local_linear() {
960 let x = uniform_grid(25);
961 let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
962
963 let y_ll = local_linear(&x, &y, &x, 0.15, "gaussian").unwrap();
964 let y_lp = local_polynomial(&x, &y, &x, 0.15, 1, "gaussian").unwrap();
965
966 for i in 0..25 {
967 assert!(
968 (y_ll[i] - y_lp[i]).abs() < 1e-10,
969 "Degree 1 should equal local linear"
970 );
971 }
972 }
973
974 #[test]
975 fn test_lp_quadratic_data() {
976 let x = uniform_grid(40);
977 let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
978
979 let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "gaussian").unwrap();
980
981 for i in 8..32 {
983 let expected = x[i] * x[i];
984 assert!(
985 (y_smooth[i] - expected).abs() < 0.1,
986 "Local quadratic should fit quadratic data"
987 );
988 }
989 }
990
991 #[test]
992 fn test_lp_invalid_input() {
993 let result =
995 local_polynomial(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, 0, "gaussian").unwrap();
996 let nw = nadaraya_watson(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0.1, "gaussian").unwrap();
997 assert_eq!(result, nw);
998
999 assert!(local_polynomial(&[], &[], &[0.5], 0.1, 2, "gaussian").is_err());
1001 }
1002
1003 #[test]
1006 fn test_knn_k1_nearest() {
1007 let x = vec![0.0, 0.5, 1.0];
1008 let y = vec![1.0, 2.0, 3.0];
1009
1010 let result = knn_smoother(&x, &y, &[0.1, 0.6, 0.9], 1).unwrap();
1011
1012 assert!((result[0] - 1.0).abs() < 1e-10, "0.1 nearest to 0.0 -> 1.0");
1014 assert!((result[1] - 2.0).abs() < 1e-10, "0.6 nearest to 0.5 -> 2.0");
1015 assert!((result[2] - 3.0).abs() < 1e-10, "0.9 nearest to 1.0 -> 3.0");
1016 }
1017
1018 #[test]
1019 fn test_knn_k_equals_n_is_mean() {
1020 let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
1021 let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1022 let expected_mean = 3.0;
1023
1024 let result = knn_smoother(&x, &y, &[0.5], 5).unwrap();
1025
1026 assert!(
1027 (result[0] - expected_mean).abs() < 1e-10,
1028 "k=n should return mean"
1029 );
1030 }
1031
1032 #[test]
1033 fn test_knn_invalid_input() {
1034 assert!(knn_smoother(&[], &[], &[0.5], 3).is_err());
1035
1036 assert!(knn_smoother(&[0.0, 1.0], &[1.0, 2.0], &[0.5], 0).is_err());
1037 }
1038
1039 #[test]
1042 fn test_smoothing_matrix_row_stochastic() {
1043 let x = uniform_grid(10);
1044 let s = smoothing_matrix_nw(&x, 0.2, "gaussian").unwrap();
1045
1046 assert_eq!(s.len(), 100);
1047
1048 for i in 0..10 {
1050 let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
1051 assert!(
1052 (row_sum - 1.0).abs() < 1e-10,
1053 "Row {} should sum to 1, got {}",
1054 i,
1055 row_sum
1056 );
1057 }
1058 }
1059
1060 #[test]
1061 fn test_smoothing_matrix_invalid_input() {
1062 assert!(smoothing_matrix_nw(&[], 0.1, "gaussian").is_err());
1063
1064 assert!(smoothing_matrix_nw(&[0.0, 1.0], 0.0, "gaussian").is_err());
1065 }
1066
1067 #[test]
1068 fn test_nan_nw_no_panic() {
1069 let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
1070 let mut y = vec![0.0, 1.0, 2.0, 1.0, 0.0];
1071 y[2] = f64::NAN;
1072 let result = nadaraya_watson(&x, &y, &x, 0.3, "gaussian").unwrap();
1073 assert_eq!(result.len(), x.len());
1074 }
1076
1077 #[test]
1078 fn test_n1_smoother() {
1079 let x = vec![0.5];
1081 let y = vec![3.0];
1082 let x_new = vec![0.5];
1083 let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian").unwrap();
1084 assert_eq!(result.len(), 1);
1085 assert!(
1086 (result[0] - 3.0).abs() < 1e-6,
1087 "Single point smoother should return the value"
1088 );
1089 }
1090
1091 #[test]
1092 fn test_duplicate_x_smoother() {
1093 let x = vec![0.0, 0.0, 0.5, 1.0, 1.0];
1095 let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1096 let x_new = vec![0.0, 0.5, 1.0];
1097 let result = nadaraya_watson(&x, &y, &x_new, 0.3, "gaussian").unwrap();
1098 assert_eq!(result.len(), 3);
1099 for v in &result {
1100 assert!(v.is_finite());
1101 }
1102 }
1103
1104 #[test]
1107 fn test_cv_smoother_linear_data() {
1108 let x = uniform_grid(30);
1109 let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
1110 let cv = cv_smoother(&x, &y, 0.2, "gaussian");
1111 assert!(cv.is_finite());
1112 assert!(cv >= 0.0);
1113 assert!(cv < 1.0, "CV error for smooth linear data should be small");
1114 }
1115
1116 #[test]
1117 fn test_cv_smoother_invalid() {
1118 assert_eq!(cv_smoother(&[], &[], 0.1, "gaussian"), f64::INFINITY);
1119 assert_eq!(
1120 cv_smoother(&[0.0, 1.0], &[1.0, 2.0], -0.1, "gaussian"),
1121 f64::INFINITY
1122 );
1123 }
1124
1125 #[test]
1126 fn test_gcv_smoother_linear_data() {
1127 let x = uniform_grid(30);
1128 let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
1129 let gcv = gcv_smoother(&x, &y, 0.2, "gaussian");
1130 assert!(gcv.is_finite());
1131 assert!(gcv >= 0.0);
1132 }
1133
1134 #[test]
1135 fn test_gcv_smoother_invalid() {
1136 assert_eq!(gcv_smoother(&[], &[], 0.1, "gaussian"), f64::INFINITY);
1137 }
1138
1139 #[test]
1140 fn test_optim_bandwidth_returns_valid() {
1141 let x = uniform_grid(30);
1142 let y: Vec<f64> = x
1143 .iter()
1144 .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
1145 .collect();
1146
1147 let result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "gaussian", 20);
1148 assert!(result.h_opt > 0.0);
1149 assert!(result.value.is_finite());
1150 assert_eq!(result.criterion, CvCriterion::Gcv);
1151 }
1152
1153 #[test]
1154 fn test_optim_bandwidth_cv_vs_gcv() {
1155 let x = uniform_grid(25);
1156 let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
1157
1158 let cv_result = optim_bandwidth(&x, &y, None, CvCriterion::Cv, "gaussian", 20);
1159 let gcv_result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "gaussian", 20);
1160
1161 assert!(cv_result.h_opt > 0.0);
1162 assert!(gcv_result.h_opt > 0.0);
1163 }
1164
1165 #[test]
1166 fn test_optim_bandwidth_custom_range() {
1167 let x = uniform_grid(20);
1168 let y: Vec<f64> = x.to_vec();
1169 let result = optim_bandwidth(&x, &y, Some((0.05, 0.5)), CvCriterion::Cv, "gaussian", 10);
1170 assert!(result.h_opt >= 0.05);
1171 assert!(result.h_opt <= 0.5);
1172 }
1173
1174 #[test]
1177 fn test_knn_gcv_returns_valid() {
1178 let x = uniform_grid(20);
1179 let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
1180
1181 let result = knn_gcv(&x, &y, 10);
1182 assert!(result.optimal_k >= 1);
1183 assert!(result.optimal_k <= 10);
1184 assert_eq!(result.cv_errors.len(), 10);
1185 for &e in &result.cv_errors {
1186 assert!(e.is_finite());
1187 assert!(e >= 0.0);
1188 }
1189 }
1190
1191 #[test]
1192 fn test_knn_gcv_constant_data() {
1193 let x = uniform_grid(15);
1194 let y = vec![5.0; 15];
1195 let result = knn_gcv(&x, &y, 5);
1196 for &e in &result.cv_errors {
1198 assert!(e < 0.01, "Constant data: CV error should be near zero");
1199 }
1200 }
1201
1202 #[test]
1203 fn test_knn_lcv_returns_valid() {
1204 let x = uniform_grid(15);
1205 let y: Vec<f64> = x.to_vec();
1206
1207 let result = knn_lcv(&x, &y, 5);
1208 assert_eq!(result.len(), 15);
1209 for &k in &result {
1210 assert!(k >= 1);
1211 assert!(k <= 5);
1212 }
1213 }
1214
1215 #[test]
1216 fn test_knn_lcv_constant_data() {
1217 let x = uniform_grid(10);
1218 let y = vec![3.0; 10];
1219 let result = knn_lcv(&x, &y, 5);
1220 assert_eq!(result.len(), 10);
1221 for &k in &result {
1224 assert!(k >= 1);
1225 }
1226 }
1227
1228 #[test]
1231 fn test_tricube_kernel_values() {
1232 let k0 = tricube_kernel(0.0);
1234 assert!((k0 - 1.0).abs() < 1e-10, "tricube(0) should be 1.0");
1235
1236 assert_eq!(tricube_kernel(1.0), 0.0, "tricube(1) should be 0");
1238 assert_eq!(tricube_kernel(-1.0), 0.0, "tricube(-1) should be 0");
1239 assert_eq!(tricube_kernel(2.0), 0.0, "tricube(2) should be 0");
1240
1241 let k05 = tricube_kernel(0.5);
1243 assert!(k05 > 0.0 && k05 < 1.0, "tricube(0.5) should be in (0, 1)");
1244 }
1245
1246 #[test]
1247 fn test_nw_tricube_constant_data() {
1248 let x = uniform_grid(20);
1249 let y = vec![5.0; 20];
1250
1251 let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "tricube").unwrap();
1252
1253 for &yi in &y_smooth {
1254 assert!(
1255 (yi - 5.0).abs() < 0.1,
1256 "Tricube NW: constant data should remain constant"
1257 );
1258 }
1259 }
1260
1261 #[test]
1262 fn test_nw_tricube_linear_data() {
1263 let x = uniform_grid(50);
1264 let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
1265
1266 let y_smooth = nadaraya_watson(&x, &y, &x, 0.2, "tricube").unwrap();
1267
1268 for i in 10..40 {
1270 let expected = 2.0 * x[i] + 1.0;
1271 assert!(
1272 (y_smooth[i] - expected).abs() < 0.3,
1273 "Tricube NW: linear trend should be approximately preserved at i={i}"
1274 );
1275 }
1276 }
1277
1278 #[test]
1279 fn test_nw_tricube_vs_gaussian() {
1280 let x = uniform_grid(30);
1281 let y: Vec<f64> = x
1282 .iter()
1283 .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
1284 .collect();
1285
1286 let y_gauss = nadaraya_watson(&x, &y, &x, 0.15, "gaussian").unwrap();
1287 let y_tri = nadaraya_watson(&x, &y, &x, 0.15, "tricube").unwrap();
1288
1289 assert_eq!(y_gauss.len(), y_tri.len());
1290
1291 assert!(y_tri.iter().all(|v| v.is_finite()));
1293
1294 let diff: f64 = y_gauss.iter().zip(&y_tri).map(|(a, b)| (a - b).abs()).sum();
1296 assert!(
1297 diff > 0.0,
1298 "Gaussian and tricube kernels should give different results"
1299 );
1300 }
1301
1302 #[test]
1303 fn test_nw_tricube_vs_epanechnikov() {
1304 let x = uniform_grid(30);
1305 let y: Vec<f64> = x
1306 .iter()
1307 .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
1308 .collect();
1309
1310 let y_epan = nadaraya_watson(&x, &y, &x, 0.15, "epanechnikov").unwrap();
1311 let y_tri = nadaraya_watson(&x, &y, &x, 0.15, "tricube").unwrap();
1312
1313 assert!(y_epan.iter().all(|v| v.is_finite()));
1315 assert!(y_tri.iter().all(|v| v.is_finite()));
1316
1317 let diff: f64 = y_epan.iter().zip(&y_tri).map(|(a, b)| (a - b).abs()).sum();
1319 assert!(
1320 diff > 0.0,
1321 "Epanechnikov and tricube should give different results"
1322 );
1323 }
1324
1325 #[test]
1326 fn test_ll_tricube_constant_data() {
1327 let x = uniform_grid(20);
1328 let y = vec![3.0; 20];
1329
1330 let y_smooth = local_linear(&x, &y, &x, 0.2, "tricube").unwrap();
1331
1332 for &yi in &y_smooth {
1333 assert!(
1334 (yi - 3.0).abs() < 0.1,
1335 "Tricube LL: constant should remain constant"
1336 );
1337 }
1338 }
1339
1340 #[test]
1341 fn test_ll_tricube_linear_data() {
1342 let x = uniform_grid(30);
1343 let y: Vec<f64> = x.iter().map(|&xi| 3.0 * xi + 2.0).collect();
1344
1345 let y_smooth = local_linear(&x, &y, &x, 0.2, "tricube").unwrap();
1346
1347 for i in 5..25 {
1349 let expected = 3.0 * x[i] + 2.0;
1350 assert!(
1351 (y_smooth[i] - expected).abs() < 0.2,
1352 "Tricube LL: should fit linear data well at i={i}"
1353 );
1354 }
1355 }
1356
1357 #[test]
1358 fn test_ll_tricube_vs_gaussian() {
1359 let x = uniform_grid(30);
1360 let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
1361
1362 let y_gauss = local_linear(&x, &y, &x, 0.15, "gaussian").unwrap();
1363 let y_tri = local_linear(&x, &y, &x, 0.15, "tricube").unwrap();
1364
1365 assert_eq!(y_gauss.len(), y_tri.len());
1366 assert!(y_tri.iter().all(|v| v.is_finite()));
1367
1368 let diff: f64 = y_gauss.iter().zip(&y_tri).map(|(a, b)| (a - b).abs()).sum();
1369 assert!(
1370 diff > 0.0,
1371 "Gaussian and tricube local linear should differ"
1372 );
1373 }
1374
1375 #[test]
1376 fn test_lp_tricube_quadratic() {
1377 let x = uniform_grid(40);
1378 let y: Vec<f64> = x.iter().map(|&xi| xi * xi).collect();
1379
1380 let y_smooth = local_polynomial(&x, &y, &x, 0.15, 2, "tricube").unwrap();
1381
1382 for i in 8..32 {
1384 let expected = x[i] * x[i];
1385 assert!(
1386 (y_smooth[i] - expected).abs() < 0.15,
1387 "Tricube LP: should fit quadratic data at i={i}"
1388 );
1389 }
1390 }
1391
1392 #[test]
1393 fn test_get_kernel_tricube_aliases() {
1394 let k1 = get_kernel("tricube");
1396 let k2 = get_kernel("tri-cube");
1397
1398 let test_val = 0.5;
1399 assert!(
1400 (k1(test_val) - k2(test_val)).abs() < 1e-15,
1401 "Both tricube aliases should give the same result"
1402 );
1403 }
1404
1405 #[test]
1406 fn test_smoothing_matrix_tricube() {
1407 let x = uniform_grid(10);
1408 let s = smoothing_matrix_nw(&x, 0.2, "tricube").unwrap();
1409
1410 assert_eq!(s.len(), 100);
1411
1412 for i in 0..10 {
1414 let row_sum: f64 = (0..10).map(|j| s[i + j * 10]).sum();
1415 assert!(
1416 (row_sum - 1.0).abs() < 1e-10,
1417 "Tricube: row {} should sum to 1, got {}",
1418 i,
1419 row_sum
1420 );
1421 }
1422 }
1423
1424 #[test]
1425 fn test_cv_smoother_tricube() {
1426 let x = uniform_grid(30);
1427 let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
1428 let cv = cv_smoother(&x, &y, 0.2, "tricube");
1429 assert!(cv.is_finite());
1430 assert!(cv >= 0.0);
1431 }
1432
1433 #[test]
1434 fn test_optim_bandwidth_tricube() {
1435 let x = uniform_grid(25);
1436 let y: Vec<f64> = x
1437 .iter()
1438 .map(|&xi| (2.0 * std::f64::consts::PI * xi).sin())
1439 .collect();
1440
1441 let result = optim_bandwidth(&x, &y, None, CvCriterion::Gcv, "tricube", 20);
1442 assert!(result.h_opt > 0.0);
1443 assert!(result.value.is_finite());
1444 }
1445}