use kiddo::SquaredEuclidean;
use ndarray::{Array1, Array2};
use std::num::NonZeroUsize;
use super::utils::unit_ball_volume;
use crate::estimators::approaches::common_nd::dataset::NdDataset;
use crate::estimators::traits::{CrossEntropy, GlobalValue, JointEntropy, LocalValues};
pub struct KozachenkoLeonenkoEntropy<const K: usize> {
pub nd: NdDataset<K>,
pub k: usize,
pub base: f64,
pub noise_level: f64,
}
impl<const K: usize> JointEntropy for KozachenkoLeonenkoEntropy<K> {
type Source = Array1<f64>;
type Params = (usize, f64);
fn joint_entropy(series: &[Self::Source], params: Self::Params) -> f64 {
assert_eq!(
series.len(),
K,
"Number of series must match dimensionality K"
);
if series.is_empty() {
return 0.0;
}
let n_samples = series[0].len();
let mut data = Array2::zeros((n_samples, K));
for (j, s) in series.iter().enumerate() {
for i in 0..n_samples {
data[[i, j]] = s[i];
}
}
let estimator = KozachenkoLeonenkoEntropy::<K>::new(data, params.0, params.1);
estimator.global_value()
}
}
impl<const K: usize> CrossEntropy for KozachenkoLeonenkoEntropy<K> {
fn cross_entropy(&self, other: &KozachenkoLeonenkoEntropy<K>) -> f64 {
use super::utils::calculate_common_entropy_components_at;
use statrs::function::gamma::digamma;
let (v_m, rho_k, _n_p, dimension) = calculate_common_entropy_components_at::<K>(
other.nd.view(),
self.k,
Some(self.nd.view()),
);
let n_q = other.nd.n as f64;
let ln_base = self.base.ln();
let mut sum_ln_rho = 0.0;
let mut count = 0usize;
for &r in &rho_k {
if r > 0.0 {
sum_ln_rho += r.ln();
count += 1;
}
}
if count == 0 {
return 0.0;
}
let hx = -digamma(self.k as f64)
+ digamma(n_q)
+ v_m.ln()
+ (dimension as f64) * sum_ln_rho / n_q;
hx / ln_base
}
}
impl<const K: usize> KozachenkoLeonenkoEntropy<K> {
pub fn new(data: Array2<f64>, k: usize, noise_level: f64) -> Self {
assert!(data.ncols() == K, "data.ncols() must equal K");
let data = super::utils::add_noise(data, noise_level);
let nd = NdDataset::<K>::from_array2(data);
assert!(nd.n == 0 || k < nd.n, "k must be <= N-1 for self-queries");
Self {
nd,
k,
base: std::f64::consts::E,
noise_level,
}
}
pub fn new_1d(data: Array1<f64>, k: usize, noise_level: f64) -> KozachenkoLeonenkoEntropy<1> {
let n = data.len();
let a2 = data.into_shape_with_order((n, 1)).expect("reshape 1d->2d");
KozachenkoLeonenkoEntropy::new(a2, k, noise_level)
}
pub fn from_points(points: Vec<[f64; K]>, k: usize, noise_level: f64) -> Self {
assert!(k >= 1);
let n = points.len();
let mut data = Array2::zeros((n, K));
for i in 0..n {
for j in 0..K {
data[[i, j]] = points[i][j];
}
}
Self::new(data, k, noise_level)
}
pub fn with_base(mut self, base: f64) -> Self {
self.base = base;
self
}
}
impl<const K: usize> GlobalValue for KozachenkoLeonenkoEntropy<K> {
fn global_value(&self) -> f64 {
use statrs::function::gamma::digamma;
if self.nd.n == 0 {
return 0.0;
}
let v_m = unit_ball_volume(K);
let mut sum_ln_r = 0.0f64;
let mut cnt = 0usize;
for p in self.nd.points.iter() {
let mut neigh = self
.nd
.tree
.nearest_n::<SquaredEuclidean>(p, NonZeroUsize::new(self.k + 1).unwrap());
let kth = neigh.remove(self.k);
let (dist2, _idx): (f64, u64) = kth.into();
let r = dist2.sqrt();
if r > 0.0 {
sum_ln_r += r.ln();
cnt += 1;
}
}
if cnt == 0 {
return 0.0;
}
let n_f = self.nd.n as f64;
let ln_base = self.base.ln();
let log_b = |x: f64| -> f64 { x.ln() / ln_base };
let term_digamma = (digamma(n_f) - digamma(self.k as f64)) / ln_base;
let term_volume = log_b(v_m);
let term_radii = (K as f64) * (sum_ln_r / (cnt as f64)) / ln_base; term_digamma + term_volume + term_radii
}
}
impl<const K: usize> LocalValues for KozachenkoLeonenkoEntropy<K> {
fn local_values(&self) -> Array1<f64> {
use statrs::function::gamma::digamma;
if self.nd.n == 0 {
return Array1::zeros(0);
}
let v_m = unit_ball_volume(K);
let n_f = self.nd.n as f64;
let ln_base = self.base.ln();
let log_b = |x: f64| -> f64 { x.ln() / ln_base };
let a_const = (digamma(n_f) - digamma(self.k as f64)) / ln_base + log_b(v_m);
let mut out = Array1::<f64>::zeros(self.nd.n);
for (i, p) in self.nd.points.iter().enumerate() {
let mut neigh = self
.nd
.tree
.nearest_n::<SquaredEuclidean>(p, NonZeroUsize::new(self.k + 1).unwrap());
let kth = neigh.remove(self.k);
let (dist2, _idx): (f64, u64) = kth.into();
let r = dist2.sqrt();
if r > 0.0 {
out[i] = a_const + (K as f64) * log_b(r);
} else {
out[i] = a_const;
}
}
out
}
}