kernel_density_estimation/kde/
univariate.rs

1use crate::bandwidth::Bandwidth;
2use crate::float::{float, KDEFloat};
3use crate::internal::{cumsum, Sealed};
4use crate::kde::KernelDensityEstimator;
5use crate::kernel::Kernel;
6
7pub trait UnivariateKDE<B, K, F: KDEFloat>: Sealed {
8    fn new<T: Into<Vec<F>>>(observations: T, bandwidth: B, kernel: K) -> Self;
9    fn pdf(&self, dataset: &[F]) -> Vec<F>;
10    fn cdf(&self, dataset: &[F]) -> Vec<F>;
11    fn sample(&self, dataset: &[F], n_samples: usize) -> Vec<F>;
12}
13
14impl<B, K, F: KDEFloat> UnivariateKDE<B, K, F> for KernelDensityEstimator<Vec<F>, B, K>
15where
16    B: Bandwidth<F>,
17    K: Kernel<F>,
18{
19    /// Returns an initialized `KernelDensityEstimator`.
20    fn new<T: Into<Vec<F>>>(observations: T, bandwidth: B, kernel: K) -> Self {
21        let observations: Vec<F> = observations.into();
22        KernelDensityEstimator {
23            observations,
24            bandwidth,
25            kernel,
26        }
27    }
28
29    /// Returns the estimated probability density function evaluated at the points in `dataset`.
30    fn pdf(&self, dataset: &[F]) -> Vec<F> {
31        let n = float!(self.observations.len());
32        let h = self.bandwidth.bandwidth(&self.observations);
33        let prefactor = F::one() / (n * h);
34        self.observations
35            .iter()
36            .fold(vec![F::zero(); dataset.len()], |mut acc, &x| {
37                dataset.iter().enumerate().for_each(|(i, &xi)| {
38                    let kernel_arg = (x - xi) / h;
39                    acc[i] += prefactor * self.kernel.pdf(kernel_arg);
40                });
41                acc
42            })
43    }
44
45    /// Returns the estimated cumulative distribution function evaluated at the points in `dataset`.
46    fn cdf(&self, dataset: &[F]) -> Vec<F> {
47        let pdf = self.pdf(dataset);
48        let sum = cumsum(&pdf);
49        let max = sum[sum.len() - 1];
50        sum.iter().map(|&x| x / max).collect()
51    }
52
53    /// Generates samples from the estimated probability density function.
54    fn sample(&self, dataset: &[F], n_samples: usize) -> Vec<F> {
55        let rng = fastrand::Rng::new();
56        let cdf = self.cdf(dataset);
57        (0..n_samples)
58            .map(|_| {
59                let rand = float!(rng.f64());
60                let mut lower_index = 0;
61                let mut upper_index = 0;
62                for (j, elem) in cdf.iter().enumerate() {
63                    if elem < &rand {
64                        lower_index = j;
65                    } else {
66                        upper_index = j;
67                        break;
68                    }
69                }
70                let xmin = dataset[lower_index];
71                let xmax = dataset[upper_index];
72                let xrange = xmax - xmin;
73                let ymin = cdf[lower_index];
74                let ymax = cdf[upper_index];
75                let yrange = ymax - ymin;
76                let yrange_rand = rand - ymin;
77                let yratio = yrange_rand / yrange;
78                (xrange * yratio) + xmin
79            })
80            .collect()
81    }
82}