1use crate::error::{StatsError, StatsResult};
25use scirs2_core::ndarray::{Array2, Ix2};
26use scirs2_core::random::core::{seeded_rng, thread_rng, Random};
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34pub enum Kernel {
35 Gaussian,
37 Epanechnikov,
39 Triangular,
41 Uniform,
43 Biweight,
45 Triweight,
47 Cosine,
49}
50
51impl Kernel {
52 pub fn evaluate(&self, u: f64) -> f64 {
54 match self {
55 Kernel::Gaussian => {
56 let inv_sqrt_2pi = 1.0 / (2.0 * std::f64::consts::PI).sqrt();
57 inv_sqrt_2pi * (-0.5 * u * u).exp()
58 }
59 Kernel::Epanechnikov => {
60 if u.abs() <= 1.0 {
61 0.75 * (1.0 - u * u)
62 } else {
63 0.0
64 }
65 }
66 Kernel::Triangular => {
67 if u.abs() <= 1.0 {
68 1.0 - u.abs()
69 } else {
70 0.0
71 }
72 }
73 Kernel::Uniform => {
74 if u.abs() <= 1.0 {
75 0.5
76 } else {
77 0.0
78 }
79 }
80 Kernel::Biweight => {
81 if u.abs() <= 1.0 {
82 let t = 1.0 - u * u;
83 (15.0 / 16.0) * t * t
84 } else {
85 0.0
86 }
87 }
88 Kernel::Triweight => {
89 if u.abs() <= 1.0 {
90 let t = 1.0 - u * u;
91 (35.0 / 32.0) * t * t * t
92 } else {
93 0.0
94 }
95 }
96 Kernel::Cosine => {
97 if u.abs() <= 1.0 {
98 (std::f64::consts::PI / 4.0) * (std::f64::consts::FRAC_PI_2 * u).cos()
99 } else {
100 0.0
101 }
102 }
103 }
104 }
105
106 pub fn support_radius(&self) -> f64 {
108 match self {
109 Kernel::Gaussian => f64::INFINITY,
110 _ => 1.0,
111 }
112 }
113
114 fn variance(&self) -> f64 {
117 match self {
118 Kernel::Gaussian => 1.0,
119 Kernel::Epanechnikov => 1.0 / 5.0,
120 Kernel::Triangular => 1.0 / 6.0,
121 Kernel::Uniform => 1.0 / 3.0,
122 Kernel::Biweight => 1.0 / 7.0,
123 Kernel::Triweight => 1.0 / 9.0,
124 Kernel::Cosine => 1.0 - 8.0 / (std::f64::consts::PI * std::f64::consts::PI),
125 }
126 }
127
128 fn roughness(&self) -> f64 {
130 match self {
131 Kernel::Gaussian => 1.0 / (2.0 * std::f64::consts::PI).sqrt(),
132 Kernel::Epanechnikov => 3.0 / 5.0,
133 Kernel::Triangular => 2.0 / 3.0,
134 Kernel::Uniform => 0.5,
135 Kernel::Biweight => 5.0 / 7.0,
136 Kernel::Triweight => 350.0 / 429.0,
137 Kernel::Cosine => {
138 std::f64::consts::PI * std::f64::consts::PI / 16.0
141 }
142 }
143 }
144
145 fn cdf_kernel(&self, u: f64) -> f64 {
147 match self {
148 Kernel::Gaussian => 0.5 * (1.0 + erf_approx(u / std::f64::consts::SQRT_2)),
149 Kernel::Epanechnikov => {
150 if u < -1.0 {
151 0.0
152 } else if u > 1.0 {
153 1.0
154 } else {
155 0.5 + 0.75 * u - 0.25 * u * u * u
156 }
157 }
158 Kernel::Triangular => {
159 if u < -1.0 {
160 0.0
161 } else if u < 0.0 {
162 0.5 * (1.0 + u) * (1.0 + u)
163 } else if u <= 1.0 {
164 1.0 - 0.5 * (1.0 - u) * (1.0 - u)
165 } else {
166 1.0
167 }
168 }
169 Kernel::Uniform => {
170 if u < -1.0 {
171 0.0
172 } else if u > 1.0 {
173 1.0
174 } else {
175 0.5 * (u + 1.0)
176 }
177 }
178 Kernel::Biweight => {
179 if u < -1.0 {
180 0.0
181 } else if u > 1.0 {
182 1.0
183 } else {
184 let t = u;
185 let val_u = t - 2.0 * t.powi(3) / 3.0 + t.powi(5) / 5.0;
189 let val_neg1 = -1.0 + 2.0 / 3.0 - 1.0 / 5.0; (15.0 / 16.0) * (val_u - val_neg1)
191 }
192 }
193 Kernel::Triweight => {
194 if u < -1.0 {
195 0.0
196 } else if u > 1.0 {
197 1.0
198 } else {
199 let t = u;
200 let anti =
204 |x: f64| -> f64 { x - x.powi(3) + 3.0 * x.powi(5) / 5.0 - x.powi(7) / 7.0 };
205 (35.0 / 32.0) * (anti(t) - anti(-1.0))
206 }
207 }
208 Kernel::Cosine => {
209 if u < -1.0 {
210 0.0
211 } else if u > 1.0 {
212 1.0
213 } else {
214 0.5 * ((std::f64::consts::FRAC_PI_2 * u).sin() + 1.0)
219 }
220 }
221 }
222 }
223}
224
225pub struct KernelDensityEstimate {
242 data: Vec<f64>,
243 bandwidth: f64,
244 kernel: Kernel,
245 weights: Option<Vec<f64>>,
246}
247
248impl KernelDensityEstimate {
249 pub fn new(data: &[f64], kernel: Kernel) -> Self {
253 let bw = if data.len() < 2 {
254 1.0
255 } else {
256 silverman_bandwidth(data)
257 };
258 Self {
259 data: data.to_vec(),
260 bandwidth: bw,
261 kernel,
262 weights: None,
263 }
264 }
265
266 pub fn with_bandwidth(data: &[f64], bandwidth: f64, kernel: Kernel) -> Self {
268 Self {
269 data: data.to_vec(),
270 bandwidth: if bandwidth > 0.0 { bandwidth } else { 1.0 },
271 kernel,
272 weights: None,
273 }
274 }
275
276 pub fn with_weights(data: &[f64], weights: &[f64], kernel: Kernel) -> StatsResult<Self> {
280 if data.len() != weights.len() {
281 return Err(StatsError::DimensionMismatch(format!(
282 "data length ({}) != weights length ({})",
283 data.len(),
284 weights.len()
285 )));
286 }
287 if weights.iter().any(|&w| w < 0.0) {
288 return Err(StatsError::InvalidArgument(
289 "Weights must be non-negative".to_string(),
290 ));
291 }
292 let bw = if data.len() < 2 {
293 1.0
294 } else {
295 silverman_bandwidth(data)
296 };
297 Ok(Self {
298 data: data.to_vec(),
299 bandwidth: bw,
300 kernel,
301 weights: Some(weights.to_vec()),
302 })
303 }
304
305 pub fn bandwidth(&self) -> f64 {
307 self.bandwidth
308 }
309
310 pub fn evaluate(&self, x: f64) -> f64 {
312 let n = self.data.len();
313 if n == 0 {
314 return 0.0;
315 }
316 let h = self.bandwidth;
317 let radius = self.kernel.support_radius();
318
319 match &self.weights {
320 None => {
321 let sum: f64 = self
322 .data
323 .iter()
324 .filter(|&&xi| radius.is_infinite() || ((x - xi) / h).abs() <= radius)
325 .map(|&xi| self.kernel.evaluate((x - xi) / h))
326 .sum();
327 sum / (n as f64 * h)
328 }
329 Some(w) => {
330 let w_sum: f64 = w.iter().sum();
331 if w_sum <= 0.0 {
332 return 0.0;
333 }
334 let sum: f64 = self
335 .data
336 .iter()
337 .zip(w.iter())
338 .filter(|(&xi, _)| radius.is_infinite() || ((x - xi) / h).abs() <= radius)
339 .map(|(&xi, &wi)| wi * self.kernel.evaluate((x - xi) / h))
340 .sum();
341 sum / (w_sum * h)
342 }
343 }
344 }
345
346 pub fn evaluate_batch(&self, xs: &[f64]) -> Vec<f64> {
348 xs.iter().map(|&x| self.evaluate(x)).collect()
349 }
350
351 pub fn log_likelihood(&self, test_data: &[f64]) -> f64 {
356 let n = self.data.len();
357 if n < 2 {
358 return f64::NEG_INFINITY;
359 }
360 let h = self.bandwidth;
361 let radius = self.kernel.support_radius();
362
363 test_data
364 .iter()
365 .map(|&x| {
366 let sum: f64 = self
367 .data
368 .iter()
369 .filter(|&&xj| {
370 radius.is_infinite() || ((x - xj) / h).abs() <= radius
373 })
374 .map(|&xj| self.kernel.evaluate((x - xj) / h))
375 .sum();
376 let density = sum / (n as f64 * h);
377 if density > 0.0 {
378 density.ln()
379 } else {
380 f64::NEG_INFINITY
381 }
382 })
383 .sum()
384 }
385
386 pub fn loo_log_likelihood(&self) -> f64 {
388 let n = self.data.len();
389 if n < 2 {
390 return f64::NEG_INFINITY;
391 }
392 let h = self.bandwidth;
393 let radius = self.kernel.support_radius();
394
395 self.data
396 .iter()
397 .enumerate()
398 .map(|(i, &xi)| {
399 let sum: f64 = self
400 .data
401 .iter()
402 .enumerate()
403 .filter(|&(j, &xj)| {
404 j != i && (radius.is_infinite() || ((xi - xj) / h).abs() <= radius)
405 })
406 .map(|(_, &xj)| self.kernel.evaluate((xi - xj) / h))
407 .sum();
408 let density = sum / ((n - 1) as f64 * h);
409 if density > 0.0 {
410 density.ln()
411 } else {
412 f64::NEG_INFINITY
413 }
414 })
415 .sum()
416 }
417
418 pub fn sample(&self, n: usize, seed: Option<u64>) -> Vec<f64> {
423 if self.data.is_empty() || n == 0 {
424 return Vec::new();
425 }
426 let mut rng = match seed {
427 Some(s) => seeded_rng(s),
428 None => {
429 let mut trng = thread_rng();
431 let s: u64 = trng.gen_range(0..u64::MAX);
432 seeded_rng(s)
433 }
434 };
435
436 let h = self.bandwidth;
437 let data_len = self.data.len();
438
439 let cum_weights = match &self.weights {
441 Some(w) => {
442 let total: f64 = w.iter().sum();
443 if total <= 0.0 {
444 (0..data_len)
446 .map(|i| (i + 1) as f64 / data_len as f64)
447 .collect::<Vec<_>>()
448 } else {
449 let mut cw = Vec::with_capacity(data_len);
450 let mut acc = 0.0;
451 for &wi in w {
452 acc += wi / total;
453 cw.push(acc);
454 }
455 cw
456 }
457 }
458 None => (0..data_len)
459 .map(|i| (i + 1) as f64 / data_len as f64)
460 .collect::<Vec<_>>(),
461 };
462
463 let mut samples = Vec::with_capacity(n);
464 for _ in 0..n {
465 let u: f64 = rng.gen_range(0.0..1.0);
467 let idx = match cum_weights
468 .binary_search_by(|cw| cw.partial_cmp(&u).unwrap_or(std::cmp::Ordering::Equal))
469 {
470 Ok(i) => i.min(data_len - 1),
471 Err(i) => i.min(data_len - 1),
472 };
473
474 let noise = sample_from_kernel(&self.kernel, &mut rng) * h;
476 samples.push(self.data[idx] + noise);
477 }
478
479 samples
480 }
481
482 pub fn cdf(&self, x: f64) -> f64 {
484 let n = self.data.len();
485 if n == 0 {
486 return 0.0;
487 }
488 let h = self.bandwidth;
489
490 match &self.weights {
491 None => {
492 let sum: f64 = self
493 .data
494 .iter()
495 .map(|&xi| self.kernel.cdf_kernel((x - xi) / h))
496 .sum();
497 sum / n as f64
498 }
499 Some(w) => {
500 let w_sum: f64 = w.iter().sum();
501 if w_sum <= 0.0 {
502 return 0.0;
503 }
504 let sum: f64 = self
505 .data
506 .iter()
507 .zip(w.iter())
508 .map(|(&xi, &wi)| wi * self.kernel.cdf_kernel((x - xi) / h))
509 .sum();
510 sum / w_sum
511 }
512 }
513 }
514
515 pub fn quantile(&self, p: f64) -> StatsResult<f64> {
517 if !(0.0..=1.0).contains(&p) {
518 return Err(StatsError::InvalidArgument(format!(
519 "Quantile probability must be in [0, 1], got {}",
520 p
521 )));
522 }
523 if self.data.is_empty() {
524 return Err(StatsError::InvalidArgument(
525 "Cannot compute quantile for empty data".to_string(),
526 ));
527 }
528
529 let min_val = self.data.iter().copied().fold(f64::INFINITY, f64::min);
531 let max_val = self.data.iter().copied().fold(f64::NEG_INFINITY, f64::max);
532 let spread = (max_val - min_val).max(1.0);
533 let mut lo = min_val - 5.0 * self.bandwidth - spread;
534 let mut hi = max_val + 5.0 * self.bandwidth + spread;
535
536 for _ in 0..200 {
538 let mid = 0.5 * (lo + hi);
539 if self.cdf(mid) < p {
540 lo = mid;
541 } else {
542 hi = mid;
543 }
544 if (hi - lo).abs() < 1e-12 {
545 break;
546 }
547 }
548 Ok(0.5 * (lo + hi))
549 }
550}
551
552pub fn silverman_bandwidth(data: &[f64]) -> f64 {
560 let n = data.len();
561 if n < 2 {
562 return 1.0;
563 }
564 let sigma = sample_std(data);
565 let iqr = interquartile_range(data);
566 let a = if iqr > 0.0 {
567 sigma.min(iqr / 1.34)
568 } else {
569 sigma
570 };
571 if a <= 0.0 {
572 return 1.0;
573 }
574 0.9 * a * (n as f64).powf(-0.2)
575}
576
577pub fn scott_bandwidth(data: &[f64]) -> f64 {
581 let n = data.len();
582 if n < 2 {
583 return 1.0;
584 }
585 let sigma = sample_std(data);
586 if sigma <= 0.0 {
587 return 1.0;
588 }
589 1.059 * sigma * (n as f64).powf(-0.2)
590}
591
592pub fn improved_sheather_jones(data: &[f64]) -> f64 {
599 let n = data.len();
600 if n < 2 {
601 return 1.0;
602 }
603
604 let sigma = sample_std(data);
605 if sigma <= 0.0 {
606 return 1.0;
607 }
608
609 let mean = data.iter().sum::<f64>() / n as f64;
611 let z: Vec<f64> = data.iter().map(|&x| (x - mean) / sigma).collect();
612
613 let n_f = n as f64;
616
617 let sqrt_2pi = (2.0 * std::f64::consts::PI).sqrt();
622 let psi_6_normal = -15.0 / (16.0 * std::f64::consts::PI.sqrt());
623 let psi_8_normal = 105.0 / (32.0 * std::f64::consts::PI.sqrt());
624
625 let g2 = if psi_8_normal.abs() > 1e-30 {
628 ((-2.0 * psi_6_normal) / (n_f * psi_8_normal))
629 .abs()
630 .powf(1.0 / 9.0)
631 } else {
632 silverman_bandwidth(&z)
633 };
634 let g1 = if psi_6_normal.abs() > 1e-30 {
635 (-6.0_f64.sqrt() / (n_f * psi_6_normal * estimate_psi_internal(&z, 4, g2)))
636 .abs()
637 .powf(1.0 / 7.0)
638 } else {
639 silverman_bandwidth(&z)
640 };
641
642 let psi_4_est = estimate_psi_internal(&z, 4, g1);
644
645 let r_k = 1.0 / (2.0 * sqrt_2pi);
649 let h_opt = if psi_4_est.abs() > 1e-30 {
650 (r_k / (n_f * psi_4_est.abs())).powf(0.2)
651 } else {
652 silverman_bandwidth(&z)
653 };
654
655 let result = h_opt * sigma;
657 if result.is_finite() && result > 0.0 {
658 result
659 } else {
660 silverman_bandwidth(data)
661 }
662}
663
664pub fn cross_validation_bandwidth(data: &[f64], kernel: &Kernel) -> f64 {
669 let n = data.len();
670 if n < 2 {
671 return 1.0;
672 }
673
674 let h_ref = silverman_bandwidth(data);
675 if h_ref <= 0.0 {
676 return 1.0;
677 }
678
679 let n_grid = 40;
681 let mut best_h = h_ref;
682 let mut best_score = f64::INFINITY;
683
684 for i in 0..n_grid {
685 let ratio = 0.1 + 2.9 * (i as f64) / (n_grid as f64 - 1.0);
686 let h = h_ref * ratio;
687 if h <= 0.0 {
688 continue;
689 }
690
691 let mut loo_sum = 0.0;
699 let radius = kernel.support_radius();
700
701 for i_pt in 0..n {
702 let xi = data[i_pt];
703 let mut density = 0.0;
704 for (j_pt, &xj) in data.iter().enumerate() {
705 if i_pt == j_pt {
706 continue;
707 }
708 let u = (xi - xj) / h;
709 if radius.is_infinite() || u.abs() <= radius {
710 density += kernel.evaluate(u);
711 }
712 }
713 density /= (n - 1) as f64 * h;
714 loo_sum += density;
715 }
716 let loo_mean = loo_sum / n as f64;
717
718 let mut integral_f2 = 0.0;
720 for i_pt in 0..n {
721 for j_pt in 0..n {
722 let u = (data[i_pt] - data[j_pt]) / h;
723 if radius.is_infinite() || u.abs() <= 2.0 * radius {
724 integral_f2 +=
726 kernel.evaluate(u / std::f64::consts::SQRT_2) / std::f64::consts::SQRT_2;
727 }
728 }
729 }
730 integral_f2 /= (n as f64) * (n as f64) * h;
731
732 let score = integral_f2 - 2.0 * loo_mean;
733
734 if score < best_score {
735 best_score = score;
736 best_h = h;
737 }
738 }
739
740 if best_h > 0.0 && best_h.is_finite() {
741 best_h
742 } else {
743 h_ref
744 }
745}
746
747pub struct KDE2D {
753 x_data: Vec<f64>,
754 y_data: Vec<f64>,
755 bandwidth_x: f64,
756 bandwidth_y: f64,
757 kernel: Kernel,
758}
759
760impl KDE2D {
761 pub fn new(x: &[f64], y: &[f64], kernel: Kernel) -> StatsResult<Self> {
765 if x.len() != y.len() {
766 return Err(StatsError::DimensionMismatch(format!(
767 "x length ({}) != y length ({})",
768 x.len(),
769 y.len()
770 )));
771 }
772 if x.is_empty() {
773 return Err(StatsError::InvalidArgument(
774 "Data arrays must not be empty".to_string(),
775 ));
776 }
777 let bw_x = silverman_bandwidth(x);
778 let bw_y = silverman_bandwidth(y);
779
780 Ok(Self {
781 x_data: x.to_vec(),
782 y_data: y.to_vec(),
783 bandwidth_x: bw_x,
784 bandwidth_y: bw_y,
785 kernel,
786 })
787 }
788
789 pub fn with_bandwidths(
791 x: &[f64],
792 y: &[f64],
793 bandwidth_x: f64,
794 bandwidth_y: f64,
795 kernel: Kernel,
796 ) -> StatsResult<Self> {
797 if x.len() != y.len() {
798 return Err(StatsError::DimensionMismatch(format!(
799 "x length ({}) != y length ({})",
800 x.len(),
801 y.len()
802 )));
803 }
804 if x.is_empty() {
805 return Err(StatsError::InvalidArgument(
806 "Data arrays must not be empty".to_string(),
807 ));
808 }
809 if bandwidth_x <= 0.0 || bandwidth_y <= 0.0 {
810 return Err(StatsError::InvalidArgument(
811 "Bandwidths must be positive".to_string(),
812 ));
813 }
814
815 Ok(Self {
816 x_data: x.to_vec(),
817 y_data: y.to_vec(),
818 bandwidth_x,
819 bandwidth_y,
820 kernel,
821 })
822 }
823
824 pub fn evaluate(&self, x: f64, y: f64) -> f64 {
828 let n = self.x_data.len();
829 if n == 0 {
830 return 0.0;
831 }
832 let hx = self.bandwidth_x;
833 let hy = self.bandwidth_y;
834 let radius = self.kernel.support_radius();
835
836 let sum: f64 = self
837 .x_data
838 .iter()
839 .zip(self.y_data.iter())
840 .filter(|(&xi, &yi)| {
841 if radius.is_infinite() {
842 true
843 } else {
844 ((x - xi) / hx).abs() <= radius && ((y - yi) / hy).abs() <= radius
845 }
846 })
847 .map(|(&xi, &yi)| {
848 let kx = self.kernel.evaluate((x - xi) / hx);
849 let ky = self.kernel.evaluate((y - yi) / hy);
850 kx * ky
851 })
852 .sum();
853
854 sum / (n as f64 * hx * hy)
855 }
856
857 pub fn evaluate_grid(&self, x_grid: &[f64], y_grid: &[f64]) -> Array2<f64> {
862 let nx = x_grid.len();
863 let ny = y_grid.len();
864 let mut grid = Array2::zeros(Ix2(nx, ny));
865
866 for (i, &xg) in x_grid.iter().enumerate() {
867 for (j, &yg) in y_grid.iter().enumerate() {
868 grid[[i, j]] = self.evaluate(xg, yg);
869 }
870 }
871
872 grid
873 }
874
875 pub fn bandwidths(&self) -> (f64, f64) {
877 (self.bandwidth_x, self.bandwidth_y)
878 }
879}
880
881fn erf_approx(x: f64) -> f64 {
886 let sign = if x >= 0.0 { 1.0 } else { -1.0 };
887 let x = x.abs();
888 let t = 1.0 / (1.0 + 0.3275911 * x);
889 let poly = t
890 * (0.254829592
891 + t * (-0.284496736 + t * (1.421413741 + t * (-1.453152027 + t * 1.061405429))));
892 sign * (1.0 - poly * (-x * x).exp())
893}
894
895fn sample_std(data: &[f64]) -> f64 {
900 let n = data.len();
901 if n < 2 {
902 return 0.0;
903 }
904 let mean = data.iter().sum::<f64>() / n as f64;
905 let var = data.iter().map(|&x| (x - mean) * (x - mean)).sum::<f64>() / (n as f64 - 1.0);
906 var.sqrt()
907}
908
909fn interquartile_range(data: &[f64]) -> f64 {
914 if data.len() < 4 {
915 return 0.0;
916 }
917 let mut sorted = data.to_vec();
918 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
919 let q1 = percentile_sorted(&sorted, 25.0);
920 let q3 = percentile_sorted(&sorted, 75.0);
921 q3 - q1
922}
923
924fn percentile_sorted(sorted: &[f64], pct: f64) -> f64 {
925 let n = sorted.len();
926 if n == 0 {
927 return 0.0;
928 }
929 let idx = (pct / 100.0) * (n as f64 - 1.0);
930 let lo = idx.floor() as usize;
931 let hi = idx.ceil() as usize;
932 let frac = idx - lo as f64;
933 if hi >= n {
934 sorted[n - 1]
935 } else {
936 sorted[lo] * (1.0 - frac) + sorted[hi] * frac
937 }
938}
939
940fn estimate_psi_internal(z: &[f64], r: usize, g: f64) -> f64 {
947 let n = z.len();
948 if n < 2 || g <= 0.0 {
949 return 0.0;
950 }
951
952 let n_f = n as f64;
953 let mut total = 0.0;
954
955 for i in 0..n {
956 for j in 0..n {
957 let u = (z[i] - z[j]) / g;
958 total += gaussian_derivative(u, r);
962 }
963 }
964
965 let g_power = g.powi(r as i32 + 1);
966 total / (n_f * n_f * g_power)
967}
968
969fn gaussian_derivative(u: f64, r: usize) -> f64 {
973 let phi = (-0.5 * u * u).exp() / (2.0 * std::f64::consts::PI).sqrt();
974 let sign = if r % 2 == 0 { 1.0 } else { -1.0 };
975 sign * hermite_prob(u, r) * phi
976}
977
978fn hermite_prob(x: f64, r: usize) -> f64 {
981 if r == 0 {
982 return 1.0;
983 }
984 if r == 1 {
985 return x;
986 }
987 let mut h_prev2 = 1.0;
988 let mut h_prev1 = x;
989 for k in 2..=r {
990 let h_curr = x * h_prev1 - (k as f64 - 1.0) * h_prev2;
991 h_prev2 = h_prev1;
992 h_prev1 = h_curr;
993 }
994 h_prev1
995}
996
997fn sample_from_kernel<R: scirs2_core::random::Rng>(kernel: &Kernel, rng: &mut Random<R>) -> f64 {
1003 match kernel {
1004 Kernel::Gaussian => {
1005 let u1: f64 = rng.gen_range(1e-15..1.0);
1007 let u2: f64 = rng.gen_range(0.0..1.0);
1008 (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos()
1009 }
1010 Kernel::Epanechnikov => {
1011 loop {
1013 let u: f64 = rng.gen_range(-1.0..1.0);
1014 let v: f64 = rng.gen_range(0.0..1.0);
1015 if v <= 0.75 * (1.0 - u * u) {
1016 break u;
1017 }
1018 }
1019 }
1020 Kernel::Triangular => {
1021 let u: f64 = rng.gen_range(0.0..1.0);
1023 if u < 0.5 {
1024 (2.0 * u).sqrt() - 1.0
1025 } else {
1026 1.0 - (2.0 * (1.0 - u)).sqrt()
1027 }
1028 }
1029 Kernel::Uniform => rng.gen_range(-1.0..1.0),
1030 Kernel::Biweight => {
1031 let k_max = 15.0 / 16.0;
1033 loop {
1034 let u: f64 = rng.gen_range(-1.0..1.0);
1035 let v: f64 = rng.gen_range(0.0..k_max);
1036 let t = 1.0 - u * u;
1037 if v <= (15.0 / 16.0) * t * t {
1038 break u;
1039 }
1040 }
1041 }
1042 Kernel::Triweight => {
1043 let k_max = 35.0 / 32.0;
1045 loop {
1046 let u: f64 = rng.gen_range(-1.0..1.0);
1047 let v: f64 = rng.gen_range(0.0..k_max);
1048 let t = 1.0 - u * u;
1049 if v <= (35.0 / 32.0) * t * t * t {
1050 break u;
1051 }
1052 }
1053 }
1054 Kernel::Cosine => {
1055 let k_max = std::f64::consts::PI / 4.0;
1057 loop {
1058 let u: f64 = rng.gen_range(-1.0..1.0);
1059 let v: f64 = rng.gen_range(0.0..k_max);
1060 if v <= (std::f64::consts::PI / 4.0) * (std::f64::consts::FRAC_PI_2 * u).cos() {
1061 break u;
1062 }
1063 }
1064 }
1065 }
1066}
1067
1068#[cfg(test)]
1073mod tests {
1074 use super::*;
1075
1076 #[test]
1079 fn test_gaussian_kernel_peak() {
1080 let k = Kernel::Gaussian;
1081 let val = k.evaluate(0.0);
1082 let expected = 1.0 / (2.0 * std::f64::consts::PI).sqrt();
1083 assert!((val - expected).abs() < 1e-12);
1084 }
1085
1086 #[test]
1087 fn test_gaussian_kernel_symmetry() {
1088 let k = Kernel::Gaussian;
1089 let v1 = k.evaluate(1.5);
1090 let v2 = k.evaluate(-1.5);
1091 assert!((v1 - v2).abs() < 1e-15);
1092 }
1093
1094 #[test]
1095 fn test_epanechnikov_kernel() {
1096 let k = Kernel::Epanechnikov;
1097 assert!((k.evaluate(0.0) - 0.75).abs() < 1e-12);
1098 assert!(k.evaluate(1.5).abs() < 1e-15);
1099 assert!(k.evaluate(-1.5).abs() < 1e-15);
1100 }
1101
1102 #[test]
1103 fn test_triangular_kernel() {
1104 let k = Kernel::Triangular;
1105 assert!((k.evaluate(0.0) - 1.0).abs() < 1e-12);
1106 assert!((k.evaluate(0.5) - 0.5).abs() < 1e-12);
1107 assert!(k.evaluate(1.5).abs() < 1e-15);
1108 }
1109
1110 #[test]
1111 fn test_uniform_kernel() {
1112 let k = Kernel::Uniform;
1113 assert!((k.evaluate(0.0) - 0.5).abs() < 1e-12);
1114 assert!((k.evaluate(0.9) - 0.5).abs() < 1e-12);
1115 assert!(k.evaluate(1.5).abs() < 1e-15);
1116 }
1117
1118 #[test]
1119 fn test_biweight_kernel() {
1120 let k = Kernel::Biweight;
1121 assert!((k.evaluate(0.0) - 15.0 / 16.0).abs() < 1e-12);
1122 assert!(k.evaluate(1.5).abs() < 1e-15);
1123 }
1124
1125 #[test]
1126 fn test_triweight_kernel() {
1127 let k = Kernel::Triweight;
1128 assert!((k.evaluate(0.0) - 35.0 / 32.0).abs() < 1e-12);
1129 assert!(k.evaluate(1.5).abs() < 1e-15);
1130 }
1131
1132 #[test]
1133 fn test_cosine_kernel() {
1134 let k = Kernel::Cosine;
1135 assert!((k.evaluate(0.0) - std::f64::consts::PI / 4.0).abs() < 1e-12);
1136 assert!(k.evaluate(1.5).abs() < 1e-15);
1137 }
1138
1139 #[test]
1140 fn test_support_radius() {
1141 assert!(Kernel::Gaussian.support_radius().is_infinite());
1142 assert!((Kernel::Epanechnikov.support_radius() - 1.0).abs() < 1e-15);
1143 assert!((Kernel::Biweight.support_radius() - 1.0).abs() < 1e-15);
1144 }
1145
1146 #[test]
1149 fn test_kernel_integrates_to_one() {
1150 let kernels = [
1151 Kernel::Gaussian,
1152 Kernel::Epanechnikov,
1153 Kernel::Triangular,
1154 Kernel::Uniform,
1155 Kernel::Biweight,
1156 Kernel::Triweight,
1157 Kernel::Cosine,
1158 ];
1159
1160 for kernel in &kernels {
1161 let n = 10000;
1162 let (lo, hi) = if kernel.support_radius().is_infinite() {
1163 (-10.0, 10.0)
1164 } else {
1165 (-1.0, 1.0)
1166 };
1167 let dx = (hi - lo) / n as f64;
1168 let integral: f64 = (0..n)
1169 .map(|i| {
1170 let x = lo + (i as f64 + 0.5) * dx;
1171 kernel.evaluate(x) * dx
1172 })
1173 .sum();
1174 assert!(
1175 (integral - 1.0).abs() < 0.01,
1176 "Kernel {:?} integrates to {} instead of 1.0",
1177 kernel,
1178 integral
1179 );
1180 }
1181 }
1182
1183 #[test]
1186 fn test_silverman_bandwidth_normal_data() {
1187 let data: Vec<f64> = (0..1000).map(|i| (i as f64 - 500.0) / 100.0).collect();
1188 let bw = silverman_bandwidth(&data);
1189 assert!(bw > 0.0);
1190 assert!(bw < 5.0);
1191 }
1192
1193 #[test]
1194 fn test_scott_bandwidth_normal_data() {
1195 let data: Vec<f64> = (0..1000).map(|i| (i as f64 - 500.0) / 100.0).collect();
1196 let bw = scott_bandwidth(&data);
1197 assert!(bw > 0.0);
1198 assert!(bw < 5.0);
1199 }
1200
1201 #[test]
1202 fn test_isj_bandwidth() {
1203 let data: Vec<f64> = (0..500).map(|i| (i as f64 - 250.0) / 50.0).collect();
1204 let bw = improved_sheather_jones(&data);
1205 assert!(bw > 0.0, "ISJ bandwidth should be positive, got {}", bw);
1206 assert!(bw.is_finite());
1207 }
1208
1209 #[test]
1210 fn test_cross_validation_bandwidth() {
1211 let data: Vec<f64> = (0..100).map(|i| (i as f64 - 50.0) / 10.0).collect();
1212 let bw = cross_validation_bandwidth(&data, &Kernel::Gaussian);
1213 assert!(bw > 0.0);
1214 assert!(bw.is_finite());
1215 }
1216
1217 #[test]
1220 fn test_kde_evaluate_positive() {
1221 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0];
1222 let kde = KernelDensityEstimate::new(&data, Kernel::Gaussian);
1223 let d = kde.evaluate(2.0);
1224 assert!(d > 0.0, "Density at data point should be positive");
1225 }
1226
1227 #[test]
1228 fn test_kde_evaluate_batch() {
1229 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0];
1230 let kde = KernelDensityEstimate::new(&data, Kernel::Gaussian);
1231 let xs = vec![0.0, 1.0, 2.0, 3.0, 4.0];
1232 let densities = kde.evaluate_batch(&xs);
1233 assert_eq!(densities.len(), 5);
1234 for &d in &densities {
1235 assert!(d > 0.0);
1236 }
1237 }
1238
1239 #[test]
1240 fn test_kde_integrates_approx_one() {
1241 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
1242 let kde = KernelDensityEstimate::new(&data, Kernel::Gaussian);
1243 let n = 5000;
1244 let lo = -10.0;
1245 let hi = 15.0;
1246 let dx = (hi - lo) / n as f64;
1247 let integral: f64 = (0..n)
1248 .map(|i| {
1249 let x = lo + (i as f64 + 0.5) * dx;
1250 kde.evaluate(x) * dx
1251 })
1252 .sum();
1253 assert!(
1254 (integral - 1.0).abs() < 0.05,
1255 "KDE should integrate to ~1.0, got {}",
1256 integral
1257 );
1258 }
1259
1260 #[test]
1261 fn test_kde_with_bandwidth() {
1262 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0];
1263 let kde = KernelDensityEstimate::with_bandwidth(&data, 0.5, Kernel::Gaussian);
1264 assert!((kde.bandwidth() - 0.5).abs() < 1e-12);
1265 let d = kde.evaluate(2.0);
1266 assert!(d > 0.0);
1267 }
1268
1269 #[test]
1270 fn test_kde_with_weights() {
1271 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0];
1272 let weights = vec![1.0, 1.0, 3.0, 1.0, 1.0];
1273 let kde = KernelDensityEstimate::with_weights(&data, &weights, Kernel::Gaussian)
1274 .expect("Should create weighted KDE");
1275 let d_at_2 = kde.evaluate(2.0);
1277 let d_at_0 = kde.evaluate(0.0);
1278 assert!(
1279 d_at_2 > d_at_0,
1280 "Weighted KDE should peak near heavier data"
1281 );
1282 }
1283
1284 #[test]
1285 fn test_kde_weight_dimension_mismatch() {
1286 let data = vec![0.0, 1.0, 2.0];
1287 let weights = vec![1.0, 1.0];
1288 let result = KernelDensityEstimate::with_weights(&data, &weights, Kernel::Gaussian);
1289 assert!(result.is_err());
1290 }
1291
1292 #[test]
1293 fn test_kde_cdf_monotone() {
1294 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
1295 let kde = KernelDensityEstimate::new(&data, Kernel::Gaussian);
1296 let xs: Vec<f64> = (-50..=100).map(|i| i as f64 * 0.1).collect();
1297 let cdfs: Vec<f64> = xs.iter().map(|&x| kde.cdf(x)).collect();
1298 for i in 1..cdfs.len() {
1299 assert!(
1300 cdfs[i] >= cdfs[i - 1] - 1e-12,
1301 "CDF should be monotonically non-decreasing"
1302 );
1303 }
1304 }
1305
1306 #[test]
1307 fn test_kde_cdf_boundary_values() {
1308 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0];
1309 let kde = KernelDensityEstimate::new(&data, Kernel::Gaussian);
1310 assert!(kde.cdf(-100.0) < 0.01, "CDF at far left should be near 0");
1311 assert!(kde.cdf(100.0) > 0.99, "CDF at far right should be near 1");
1312 }
1313
1314 #[test]
1315 fn test_kde_quantile() {
1316 let data: Vec<f64> = (0..100).map(|i| i as f64 / 10.0).collect();
1317 let kde = KernelDensityEstimate::new(&data, Kernel::Gaussian);
1318 let q50 = kde.quantile(0.5).expect("Should compute median quantile");
1319 assert!(
1321 (q50 - 4.95).abs() < 1.0,
1322 "Median quantile should be near 5, got {}",
1323 q50
1324 );
1325 }
1326
1327 #[test]
1328 fn test_kde_quantile_invalid() {
1329 let data = vec![0.0, 1.0, 2.0];
1330 let kde = KernelDensityEstimate::new(&data, Kernel::Gaussian);
1331 assert!(kde.quantile(1.5).is_err());
1332 assert!(kde.quantile(-0.1).is_err());
1333 }
1334
1335 #[test]
1336 fn test_kde_sample() {
1337 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
1338 let kde = KernelDensityEstimate::new(&data, Kernel::Gaussian);
1339 let samples = kde.sample(1000, Some(42));
1340 assert_eq!(samples.len(), 1000);
1341 let sample_mean: f64 = samples.iter().sum::<f64>() / samples.len() as f64;
1343 assert!(
1344 (sample_mean - 2.5).abs() < 1.0,
1345 "Sample mean should be near data mean, got {}",
1346 sample_mean
1347 );
1348 }
1349
1350 #[test]
1351 fn test_kde_log_likelihood() {
1352 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
1353 let kde = KernelDensityEstimate::new(&data, Kernel::Gaussian);
1354 let ll = kde.log_likelihood(&[1.0, 2.0, 3.0]);
1355 assert!(ll.is_finite(), "Log-likelihood should be finite");
1356 assert!(
1357 ll < 0.0,
1358 "Log-likelihood of a density < 1 should be negative for these points"
1359 );
1360 }
1361
1362 #[test]
1363 fn test_kde_loo_log_likelihood() {
1364 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
1365 let kde = KernelDensityEstimate::new(&data, Kernel::Gaussian);
1366 let loo = kde.loo_log_likelihood();
1367 assert!(loo.is_finite(), "LOO log-likelihood should be finite");
1368 }
1369
1370 #[test]
1371 fn test_kde_all_kernels_evaluate() {
1372 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
1373 let kernels = [
1374 Kernel::Gaussian,
1375 Kernel::Epanechnikov,
1376 Kernel::Triangular,
1377 Kernel::Uniform,
1378 Kernel::Biweight,
1379 Kernel::Triweight,
1380 Kernel::Cosine,
1381 ];
1382 for kernel in &kernels {
1383 let kde = KernelDensityEstimate::with_bandwidth(&data, 1.0, *kernel);
1384 let d = kde.evaluate(2.5);
1385 assert!(
1386 d > 0.0,
1387 "Density with {:?} kernel should be positive at data center",
1388 kernel
1389 );
1390 }
1391 }
1392
1393 #[test]
1396 fn test_kde2d_basic() {
1397 let x = vec![0.0, 1.0, 2.0, 3.0, 4.0];
1398 let y = vec![0.0, 1.0, 2.0, 3.0, 4.0];
1399 let kde = KDE2D::new(&x, &y, Kernel::Gaussian).expect("Should create 2D KDE");
1400 let d = kde.evaluate(2.0, 2.0);
1401 assert!(d > 0.0, "2D density at data centre should be positive");
1402 }
1403
1404 #[test]
1405 fn test_kde2d_dimension_mismatch() {
1406 let x = vec![0.0, 1.0, 2.0];
1407 let y = vec![0.0, 1.0];
1408 let result = KDE2D::new(&x, &y, Kernel::Gaussian);
1409 assert!(result.is_err());
1410 }
1411
1412 #[test]
1413 fn test_kde2d_grid() {
1414 let x = vec![0.0, 1.0, 2.0, 3.0, 4.0];
1415 let y = vec![0.0, 1.0, 2.0, 3.0, 4.0];
1416 let kde = KDE2D::new(&x, &y, Kernel::Gaussian).expect("Should create 2D KDE");
1417 let xg = vec![0.0, 2.0, 4.0];
1418 let yg = vec![0.0, 2.0, 4.0];
1419 let grid = kde.evaluate_grid(&xg, &yg);
1420 assert_eq!(grid.shape(), &[3, 3]);
1421 for &val in grid.iter() {
1423 assert!(val >= 0.0);
1424 }
1425 }
1426
1427 #[test]
1428 fn test_kde2d_with_bandwidths() {
1429 let x = vec![0.0, 1.0, 2.0, 3.0];
1430 let y = vec![0.0, 1.0, 2.0, 3.0];
1431 let kde = KDE2D::with_bandwidths(&x, &y, 0.5, 0.5, Kernel::Epanechnikov)
1432 .expect("Should create 2D KDE");
1433 let (bx, by) = kde.bandwidths();
1434 assert!((bx - 0.5).abs() < 1e-12);
1435 assert!((by - 0.5).abs() < 1e-12);
1436 }
1437
1438 #[test]
1439 fn test_kde2d_integrates_approx_one() {
1440 let x_data = vec![0.0, 1.0, 2.0, 3.0, 4.0];
1441 let y_data = vec![0.0, 1.0, 2.0, 3.0, 4.0];
1442 let kde = KDE2D::new(&x_data, &y_data, Kernel::Gaussian).expect("Should create 2D KDE");
1443 let n = 100;
1444 let lo = -5.0;
1445 let hi = 9.0;
1446 let dx = (hi - lo) / n as f64;
1447 let mut integral = 0.0;
1448 for i in 0..n {
1449 for j in 0..n {
1450 let x = lo + (i as f64 + 0.5) * dx;
1451 let y = lo + (j as f64 + 0.5) * dx;
1452 integral += kde.evaluate(x, y) * dx * dx;
1453 }
1454 }
1455 assert!(
1456 (integral - 1.0).abs() < 0.1,
1457 "2D KDE should integrate to ~1.0, got {}",
1458 integral
1459 );
1460 }
1461
1462 #[test]
1465 fn test_kde_empty_data() {
1466 let kde = KernelDensityEstimate::new(&[], Kernel::Gaussian);
1467 assert!((kde.evaluate(0.0)).abs() < 1e-15);
1468 }
1469
1470 #[test]
1471 fn test_kde_single_point() {
1472 let kde = KernelDensityEstimate::new(&[5.0], Kernel::Gaussian);
1473 let d = kde.evaluate(5.0);
1474 assert!(d > 0.0);
1475 }
1476
1477 #[test]
1478 fn test_kde_constant_data() {
1479 let data = vec![3.0; 50];
1480 let kde = KernelDensityEstimate::new(&data, Kernel::Gaussian);
1481 let d = kde.evaluate(3.0);
1482 assert!(d > 0.0);
1483 }
1484
1485 #[test]
1486 fn test_kde_sample_reproducible() {
1487 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0];
1488 let kde = KernelDensityEstimate::new(&data, Kernel::Gaussian);
1489 let s1 = kde.sample(100, Some(12345));
1490 let s2 = kde.sample(100, Some(12345));
1491 assert_eq!(s1, s2, "Same seed should produce same samples");
1492 }
1493
1494 #[test]
1495 fn test_kernel_variance_positive() {
1496 let kernels = [
1497 Kernel::Gaussian,
1498 Kernel::Epanechnikov,
1499 Kernel::Triangular,
1500 Kernel::Uniform,
1501 Kernel::Biweight,
1502 Kernel::Triweight,
1503 Kernel::Cosine,
1504 ];
1505 for k in &kernels {
1506 assert!(
1507 k.variance() > 0.0,
1508 "Kernel {:?} variance should be positive",
1509 k
1510 );
1511 }
1512 }
1513
1514 #[test]
1515 fn test_kernel_roughness_positive() {
1516 let kernels = [
1517 Kernel::Gaussian,
1518 Kernel::Epanechnikov,
1519 Kernel::Triangular,
1520 Kernel::Uniform,
1521 Kernel::Biweight,
1522 Kernel::Triweight,
1523 Kernel::Cosine,
1524 ];
1525 for k in &kernels {
1526 assert!(
1527 k.roughness() > 0.0,
1528 "Kernel {:?} roughness should be positive",
1529 k
1530 );
1531 }
1532 }
1533
1534 #[test]
1535 fn test_kernel_cdf_boundary() {
1536 let kernels = [
1537 Kernel::Gaussian,
1538 Kernel::Epanechnikov,
1539 Kernel::Triangular,
1540 Kernel::Uniform,
1541 Kernel::Biweight,
1542 Kernel::Triweight,
1543 Kernel::Cosine,
1544 ];
1545 for k in &kernels {
1546 let lo = if k.support_radius().is_infinite() {
1547 -20.0
1548 } else {
1549 -2.0
1550 };
1551 let hi = if k.support_radius().is_infinite() {
1552 20.0
1553 } else {
1554 2.0
1555 };
1556 assert!(
1557 k.cdf_kernel(lo) < 0.01,
1558 "Kernel {:?} CDF at far left should be near 0, got {}",
1559 k,
1560 k.cdf_kernel(lo)
1561 );
1562 assert!(
1563 k.cdf_kernel(hi) > 0.99,
1564 "Kernel {:?} CDF at far right should be near 1, got {}",
1565 k,
1566 k.cdf_kernel(hi)
1567 );
1568 }
1569 }
1570
1571 #[test]
1572 fn test_hermite_polynomials() {
1573 assert!((hermite_prob(2.0, 0) - 1.0).abs() < 1e-12);
1575 assert!((hermite_prob(2.0, 1) - 2.0).abs() < 1e-12);
1576 assert!((hermite_prob(2.0, 2) - 3.0).abs() < 1e-12); assert!((hermite_prob(2.0, 3) - 2.0).abs() < 1e-12); }
1579
1580 #[test]
1581 fn test_erf_approx_values() {
1582 assert!((erf_approx(0.0)).abs() < 1e-7);
1583 assert!((erf_approx(1.0) - 0.84270079).abs() < 1e-5);
1584 assert!((erf_approx(-1.0) + 0.84270079).abs() < 1e-5);
1585 }
1586}