use crate::bandwidth::Bandwidth;
use crate::float::{float, KDEFloat};
use crate::internal::{cumsum, Sealed};
use crate::kde::KernelDensityEstimator;
use crate::kernel::Kernel;
pub trait UnivariateKDE<B, K, F: KDEFloat>: Sealed {
fn new<T: Into<Vec<F>>>(observations: T, bandwidth: B, kernel: K) -> Self;
fn pdf(&self, dataset: &[F]) -> Vec<F>;
fn cdf(&self, dataset: &[F]) -> Vec<F>;
fn sample(&self, dataset: &[F], n_samples: usize) -> Vec<F>;
}
impl<B, K, F: KDEFloat> UnivariateKDE<B, K, F> for KernelDensityEstimator<Vec<F>, B, K>
where
B: Bandwidth<F>,
K: Kernel<F>,
{
fn new<T: Into<Vec<F>>>(observations: T, bandwidth: B, kernel: K) -> Self {
let observations: Vec<F> = observations.into();
KernelDensityEstimator {
observations,
bandwidth,
kernel,
}
}
fn pdf(&self, dataset: &[F]) -> Vec<F> {
let n = float!(self.observations.len());
let h = self.bandwidth.bandwidth(&self.observations);
let prefactor = F::one() / (n * h);
self.observations
.iter()
.fold(vec![F::zero(); dataset.len()], |mut acc, &x| {
dataset.iter().enumerate().for_each(|(i, &xi)| {
let kernel_arg = (x - xi) / h;
acc[i] += prefactor * self.kernel.pdf(kernel_arg);
});
acc
})
}
fn cdf(&self, dataset: &[F]) -> Vec<F> {
let pdf = self.pdf(dataset);
let sum = cumsum(&pdf);
let max = sum[sum.len() - 1];
sum.iter().map(|&x| x / max).collect()
}
fn sample(&self, dataset: &[F], n_samples: usize) -> Vec<F> {
let rng = fastrand::Rng::new();
let cdf = self.cdf(dataset);
(0..n_samples)
.map(|_| {
let rand = float!(rng.f64());
let mut lower_index = 0;
let mut upper_index = 0;
for (j, elem) in cdf.iter().enumerate() {
if elem < &rand {
lower_index = j;
} else {
upper_index = j;
break;
}
}
let xmin = dataset[lower_index];
let xmax = dataset[upper_index];
let xrange = xmax - xmin;
let ymin = cdf[lower_index];
let ymax = cdf[upper_index];
let yrange = ymax - ymin;
let yrange_rand = rand - ymin;
let yratio = yrange_rand / yrange;
(xrange * yratio) + xmin
})
.collect()
}
}