use anyhow::Result;
use ndarray::{s, Array1, Array2, ArrayView2, Dim};
use ndarray_linalg::UPLO::Lower;
use ndarray_linalg::{Cholesky, Inverse};
use ndarray_stats::CorrelationExt;
use std::f32::consts::PI;
use crate::training::node_estimation::multi_kde::gaussian_kernel_estimate::gaussian_kernel_estimate;
use crate::utils::FloatFunctions;
#[derive(Debug, Clone)]
pub(in crate::training::node_estimation::multi_kde) struct GaussianKDEBase<'a> {
data: ArrayView2<'a, f32>,
covariance: Option<Array2<f32>>,
inv_covariance: Option<Array2<f32>>,
weights: Option<Array2<f32>>,
log_det: Option<f32>,
}
impl<'a> GaussianKDEBase<'a> {
pub fn new(data: ArrayView2<'a, f32>) -> Self {
let mut kde = Self {
data,
covariance: None,
inv_covariance: None,
weights: None,
log_det: None,
};
kde.compute_covariance();
kde
}
#[allow(dead_code)]
pub fn evaluate(&self, points: Array2<f32>) -> Result<Array1<f32>> {
let result = gaussian_kernel_estimate(
self.data.view(),
self.weights
.as_ref()
.expect("Before evaluating, the weights must be calculated!")
.view(),
points,
self.inv_covariance
.as_ref()
.expect("Before evaluating, the inverse of the covariance must be calculated!")
.view(),
)?;
Ok(result.slice(s![.., 0]).into_owned())
}
#[allow(dead_code)]
fn compute_covariance(&mut self) {
self.calculate_weights();
let factor = self.scotts_factor();
let covariance = self.data.t().cov(1.).unwrap();
let covariance_inv = covariance.inv().unwrap(); self.covariance = Some(covariance * factor.powi(2));
self.inv_covariance = Some(covariance_inv / factor.powi(2));
let l: Array2<f32> = (self.covariance.as_ref().unwrap() * 2.0 * PI)
.cholesky(Lower)
.unwrap();
self.log_det = Some(2.0 * l.diag().into_owned().ln().sum());
}
#[allow(dead_code)]
fn scotts_factor(&self) -> f32 {
let d = self.data.shape()[1];
let exponent = -1.0 / ((d + 4) as f32);
self.neff().powf(exponent)
}
#[allow(dead_code)]
fn neff(&self) -> f32 {
let weights = self.weights.as_ref().unwrap();
1.0 / weights.clone().powi(2).sum()
}
#[allow(dead_code)]
fn calculate_weights(&mut self) {
let n = self.data.shape()[0];
self.weights = Some(Array2::ones(Dim([n, 1])) / (n as f32));
}
}
#[cfg(test)]
mod tests {
use crate::training::node_estimation::multi_kde::gaussian_kde::GaussianKDEBase;
use ndarray::{arr1, arr2, Array1, Array2};
use ndarray_linalg::assert_close_l1;
fn setup() -> (Array2<f32>, Array2<f32>, Array1<f32>) {
let data = arr2(&[[2., 2.1, 2.2, 8., 8.1, 8.2]]).t().into_owned();
let grid = arr2(&[[0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]])
.t()
.into_owned();
let expected: Array1<f32> = arr1(&[
0.0573441, 0.07811948, 0.08925389, 0.08777485, 0.07935172, 0.07411137, 0.07774745,
0.0863332, 0.0899083, 0.08132743,
]);
(data, grid, expected)
}
#[test]
fn correct_fit() {
let (data, _, _) = setup();
let covariance = arr2(&[[5.27818777]]);
let inv_covariance = arr2(&[[0.18945897]]);
let log_det = 3.5014598793760214;
let gkde = GaussianKDEBase::new(data.view());
assert_eq!(gkde.covariance.unwrap(), covariance);
assert_eq!(gkde.inv_covariance.unwrap(), inv_covariance);
assert_eq!(gkde.log_det.unwrap(), log_det);
}
#[test]
fn correct_evaluate() {
let (data, grid, expected) = setup();
let gkde = GaussianKDEBase::new(data.view());
let result = gkde.evaluate(grid).unwrap();
assert_close_l1!(&result, &expected, 0.0000001);
}
}