use crate::estimators::traits::{
ConditionalMutualInformationEstimator, ConditionalTransferEntropyEstimator,
MutualInformationEstimator, TransferEntropyEstimator,
};
use crate::estimators::traits::{
CrossEntropy, GlobalValue, JointEntropy, LocalValues, OptionalLocalValues,
};
use crate::estimators::utils::te_slicing::{cte_observations_const, te_observations_const};
use kiddo::{ImmutableKdTree, SquaredEuclidean};
use ndarray::{Array1, Array2, Axis, concatenate};
use ndarray_linalg::{Cholesky, Inverse, UPLO};
use ndarray_stats::CorrelationExt;
pub struct KernelTransferEntropy<
const SRC_HIST: usize,
const DEST_HIST: usize,
const STEP_SIZE: usize,
const D_SOURCE: usize,
const D_TARGET: usize,
const D_JOINT: usize,
const D_XP_YP: usize,
const D_YP: usize,
const D_YF_YP: usize,
> {
pub kernel_type: String,
pub bandwidth: f64,
pub source: Array2<f64>,
pub dest: Array2<f64>,
pub force_cpu: bool,
}
impl<
const SRC_HIST: usize,
const DEST_HIST: usize,
const STEP_SIZE: usize,
const D_SOURCE: usize,
const D_TARGET: usize,
const D_JOINT: usize,
const D_XP_YP: usize,
const D_YP: usize,
const D_YF_YP: usize,
>
KernelTransferEntropy<
SRC_HIST,
DEST_HIST,
STEP_SIZE,
D_SOURCE,
D_TARGET,
D_JOINT,
D_XP_YP,
D_YP,
D_YF_YP,
>
{
pub fn new(
source: &Array2<f64>,
dest: &Array2<f64>,
_src_hist_len: usize,
_dest_hist_len: usize,
_step_size: usize,
kernel_type: String,
bandwidth: f64,
) -> Self {
Self {
kernel_type,
bandwidth,
source: source.clone(),
dest: dest.clone(),
force_cpu: false,
}
}
pub fn set_force_cpu(&mut self, force_cpu: bool) {
self.force_cpu = force_cpu;
}
fn compute_density<const D: usize>(&self, data: Array2<f64>) -> Array1<f64> {
let mut est = KernelEntropy::<D>::new_with_kernel_type(
data,
self.kernel_type.clone(),
self.bandwidth,
);
est.set_force_cpu(self.force_cpu);
est.kde_probability_density()
}
}
impl<
const SRC_HIST: usize,
const DEST_HIST: usize,
const STEP_SIZE: usize,
const D_SOURCE: usize,
const D_TARGET: usize,
const D_JOINT: usize,
const D_XP_YP: usize,
const D_YP: usize,
const D_YF_YP: usize,
> GlobalValue
for KernelTransferEntropy<
SRC_HIST,
DEST_HIST,
STEP_SIZE,
D_SOURCE,
D_TARGET,
D_JOINT,
D_XP_YP,
D_YP,
D_YF_YP,
>
{
fn global_value(&self) -> f64 {
self.local_values().mean().unwrap_or(0.0)
}
}
impl<
const SRC_HIST: usize,
const DEST_HIST: usize,
const STEP_SIZE: usize,
const D_SOURCE: usize,
const D_TARGET: usize,
const D_JOINT: usize,
const D_XP_YP: usize,
const D_YP: usize,
const D_YF_YP: usize,
> OptionalLocalValues
for KernelTransferEntropy<
SRC_HIST,
DEST_HIST,
STEP_SIZE,
D_SOURCE,
D_TARGET,
D_JOINT,
D_XP_YP,
D_YP,
D_YF_YP,
>
{
fn supports_local(&self) -> bool {
true
}
fn local_values_opt(&self) -> Result<Array1<f64>, &'static str> {
Ok(self.local_values())
}
}
impl<
const SRC_HIST: usize,
const DEST_HIST: usize,
const STEP_SIZE: usize,
const D_SOURCE: usize,
const D_TARGET: usize,
const D_JOINT: usize,
const D_XP_YP: usize,
const D_YP: usize,
const D_YF_YP: usize,
> TransferEntropyEstimator
for KernelTransferEntropy<
SRC_HIST,
DEST_HIST,
STEP_SIZE,
D_SOURCE,
D_TARGET,
D_JOINT,
D_XP_YP,
D_YP,
D_YF_YP,
>
{
}
impl<
const SRC_HIST: usize,
const DEST_HIST: usize,
const STEP_SIZE: usize,
const D_SOURCE: usize,
const D_TARGET: usize,
const D_JOINT: usize,
const D_XP_YP: usize,
const D_YP: usize,
const D_YF_YP: usize,
> LocalValues
for KernelTransferEntropy<
SRC_HIST,
DEST_HIST,
STEP_SIZE,
D_SOURCE,
D_TARGET,
D_JOINT,
D_XP_YP,
D_YP,
D_YF_YP,
>
{
fn local_values(&self) -> Array1<f64> {
let (yf, yp, xp) = te_observations_const::<
f64,
SRC_HIST,
DEST_HIST,
STEP_SIZE,
D_SOURCE,
D_TARGET,
D_JOINT,
D_XP_YP,
D_YP,
D_YF_YP,
>(&self.source, &self.dest, false);
let joint_all = concatenate(Axis(1), &[yf.view(), xp.view(), yp.view()]).unwrap();
let xp_yp = concatenate(Axis(1), &[xp.view(), yp.view()]).unwrap();
let yf_yp = concatenate(Axis(1), &[yf.view(), yp.view()]).unwrap();
let p_joint_all = self.compute_density::<D_JOINT>(joint_all);
let p_yp = self.compute_density::<D_YP>(yp);
let p_xp_yp = self.compute_density::<D_XP_YP>(xp_yp);
let p_yf_yp = self.compute_density::<D_YF_YP>(yf_yp);
let n = p_joint_all.len();
let mut local_te = Array1::zeros(n);
for i in 0..n {
let num = p_joint_all[i] * p_yp[i];
let den = p_xp_yp[i] * p_yf_yp[i];
if num > 0.0 && den > 0.0 {
local_te[i] = (num / den).ln();
}
}
local_te
}
}
pub struct KernelConditionalTransferEntropy<
const SRC_HIST: usize,
const DEST_HIST: usize,
const COND_HIST: usize,
const STEP_SIZE: usize,
const D_SOURCE: usize,
const D_TARGET: usize,
const D_COND: usize,
const D_JOINT: usize,
const D_XP_YP_ZP: usize,
const D_YP_ZP: usize,
const D_YF_YP_ZP: usize,
> {
pub kernel_type: String,
pub bandwidth: f64,
pub source: Array2<f64>,
pub dest: Array2<f64>,
pub cond: Array2<f64>,
pub force_cpu: bool,
}
impl<
const SRC_HIST: usize,
const DEST_HIST: usize,
const COND_HIST: usize,
const STEP_SIZE: usize,
const D_SOURCE: usize,
const D_TARGET: usize,
const D_COND: usize,
const D_JOINT: usize,
const D_XP_YP_ZP: usize,
const D_YP_ZP: usize,
const D_YF_YP_ZP: usize,
>
KernelConditionalTransferEntropy<
SRC_HIST,
DEST_HIST,
COND_HIST,
STEP_SIZE,
D_SOURCE,
D_TARGET,
D_COND,
D_JOINT,
D_XP_YP_ZP,
D_YP_ZP,
D_YF_YP_ZP,
>
{
#[allow(clippy::too_many_arguments)]
pub fn new(
source: &Array2<f64>,
dest: &Array2<f64>,
cond: &Array2<f64>,
_src_hist_len: usize,
_dest_hist_len: usize,
_cond_hist_len: usize,
_step_size: usize,
kernel_type: String,
bandwidth: f64,
) -> Self {
Self {
kernel_type,
bandwidth,
source: source.clone(),
dest: dest.clone(),
cond: cond.clone(),
force_cpu: false,
}
}
pub fn set_force_cpu(&mut self, force_cpu: bool) {
self.force_cpu = force_cpu;
}
fn compute_density<const D: usize>(&self, data: Array2<f64>) -> Array1<f64> {
let mut est = KernelEntropy::<D>::new_with_kernel_type(
data,
self.kernel_type.clone(),
self.bandwidth,
);
est.set_force_cpu(self.force_cpu);
est.kde_probability_density()
}
}
impl<
const SRC_HIST: usize,
const DEST_HIST: usize,
const COND_HIST: usize,
const STEP_SIZE: usize,
const D_SOURCE: usize,
const D_TARGET: usize,
const D_COND: usize,
const D_JOINT: usize,
const D_XP_YP_ZP: usize,
const D_YP_ZP: usize,
const D_YF_YP_ZP: usize,
> GlobalValue
for KernelConditionalTransferEntropy<
SRC_HIST,
DEST_HIST,
COND_HIST,
STEP_SIZE,
D_SOURCE,
D_TARGET,
D_COND,
D_JOINT,
D_XP_YP_ZP,
D_YP_ZP,
D_YF_YP_ZP,
>
{
fn global_value(&self) -> f64 {
self.local_values().mean().unwrap_or(0.0)
}
}
impl<
const SRC_HIST: usize,
const DEST_HIST: usize,
const COND_HIST: usize,
const STEP_SIZE: usize,
const D_SOURCE: usize,
const D_TARGET: usize,
const D_COND: usize,
const D_JOINT: usize,
const D_XP_YP_ZP: usize,
const D_YP_ZP: usize,
const D_YF_YP_ZP: usize,
> OptionalLocalValues
for KernelConditionalTransferEntropy<
SRC_HIST,
DEST_HIST,
COND_HIST,
STEP_SIZE,
D_SOURCE,
D_TARGET,
D_COND,
D_JOINT,
D_XP_YP_ZP,
D_YP_ZP,
D_YF_YP_ZP,
>
{
fn supports_local(&self) -> bool {
true
}
fn local_values_opt(&self) -> Result<Array1<f64>, &'static str> {
Ok(self.local_values())
}
}
impl<
const SRC_HIST: usize,
const DEST_HIST: usize,
const COND_HIST: usize,
const STEP_SIZE: usize,
const D_SOURCE: usize,
const D_TARGET: usize,
const D_COND: usize,
const D_JOINT: usize,
const D_XP_YP_ZP: usize,
const D_YP_ZP: usize,
const D_YF_YP_ZP: usize,
> ConditionalTransferEntropyEstimator
for KernelConditionalTransferEntropy<
SRC_HIST,
DEST_HIST,
COND_HIST,
STEP_SIZE,
D_SOURCE,
D_TARGET,
D_COND,
D_JOINT,
D_XP_YP_ZP,
D_YP_ZP,
D_YF_YP_ZP,
>
{
}
impl<
const SRC_HIST: usize,
const DEST_HIST: usize,
const COND_HIST: usize,
const STEP_SIZE: usize,
const D_SOURCE: usize,
const D_TARGET: usize,
const D_COND: usize,
const D_JOINT: usize,
const D_XP_YP_ZP: usize,
const D_YP_ZP: usize,
const D_YF_YP_ZP: usize,
> LocalValues
for KernelConditionalTransferEntropy<
SRC_HIST,
DEST_HIST,
COND_HIST,
STEP_SIZE,
D_SOURCE,
D_TARGET,
D_COND,
D_JOINT,
D_XP_YP_ZP,
D_YP_ZP,
D_YF_YP_ZP,
>
{
fn local_values(&self) -> Array1<f64> {
let (yf, yp, xp, zp) = cte_observations_const::<
f64,
SRC_HIST,
DEST_HIST,
COND_HIST,
STEP_SIZE,
D_SOURCE,
D_TARGET,
D_COND,
D_JOINT,
D_XP_YP_ZP,
D_YP_ZP,
D_YF_YP_ZP,
>(&self.source, &self.dest, &self.cond, false);
let joint_all =
concatenate(Axis(1), &[yf.view(), xp.view(), yp.view(), zp.view()]).unwrap();
let xp_yp_zp = concatenate(Axis(1), &[xp.view(), yp.view(), zp.view()]).unwrap();
let yp_zp = concatenate(Axis(1), &[yp.view(), zp.view()]).unwrap();
let yf_yp_zp = concatenate(Axis(1), &[yf.view(), yp.view(), zp.view()]).unwrap();
let p_joint_all = self.compute_density::<D_JOINT>(joint_all);
let p_xp_yp_zp = self.compute_density::<D_XP_YP_ZP>(xp_yp_zp);
let p_yp_zp = self.compute_density::<D_YP_ZP>(yp_zp);
let p_yf_yp_zp = self.compute_density::<D_YF_YP_ZP>(yf_yp_zp);
let n = p_joint_all.len();
let mut local_cte = Array1::zeros(n);
for i in 0..n {
let num = p_joint_all[i] * p_yp_zp[i];
let den = p_xp_yp_zp[i] * p_yf_yp_zp[i];
if num > 0.0 && den > 0.0 {
local_cte[i] = (num / den).ln();
}
}
local_cte
}
}
macro_rules! impl_kernel_mi {
($name:ident, $num_rvs:expr, ($($d_param:ident),*), ($($d_idx:expr),*)) => {
#[doc = concat!("Kernel-based mutual information estimator for ", stringify!($num_rvs), " random variables")]
pub struct $name<const D_JOINT: usize, $(const $d_param: usize),*> {
pub kernel_type: String,
pub bandwidth: f64,
pub data: Vec<Array2<f64>>,
pub force_cpu: bool,
}
impl<const D_JOINT: usize, $(const $d_param: usize),*> $name<D_JOINT, $($d_param),*> {
pub fn new(series: &[Array2<f64>], kernel_type: String, bandwidth: f64) -> Self {
Self {
kernel_type,
bandwidth,
data: series.to_vec(),
force_cpu: false,
}
}
pub fn set_force_cpu(&mut self, force_cpu: bool) {
self.force_cpu = force_cpu;
}
}
impl<const D_JOINT: usize, $(const $d_param: usize),*> GlobalValue for $name<D_JOINT, $($d_param),*> {
fn global_value(&self) -> f64 {
self.local_values().mean().unwrap_or(0.0)
}
}
impl<const D_JOINT: usize, $(const $d_param: usize),*> OptionalLocalValues for $name<D_JOINT, $($d_param),*> {
fn supports_local(&self) -> bool { true }
fn local_values_opt(&self) -> Result<Array1<f64>, &'static str> {
Ok(self.local_values())
}
}
impl<const D_JOINT: usize, $(const $d_param: usize),*> MutualInformationEstimator for $name<D_JOINT, $($d_param),*> {}
impl<const D_JOINT: usize, $(const $d_param: usize),*> LocalValues for $name<D_JOINT, $($d_param),*> {
fn local_values(&self) -> Array1<f64> {
let joint_data = concatenate(
Axis(1),
&self.data.iter().map(|d| d.view()).collect::<Vec<_>>(),
).unwrap();
let mut joint_est = KernelEntropy::<D_JOINT>::new_with_kernel_type(
joint_data,
self.kernel_type.clone(),
self.bandwidth,
);
joint_est.set_force_cpu(self.force_cpu);
let joint_density = joint_est.kde_probability_density();
let mut marginal_densities = Vec::new();
$(
let mut m_est = KernelEntropy::<$d_param>::new_with_kernel_type(
self.data[$d_idx].clone(),
self.kernel_type.clone(),
self.bandwidth
);
m_est.set_force_cpu(self.force_cpu);
marginal_densities.push(m_est.kde_probability_density());
)*
let n = joint_density.len();
let mut local_mi = Array1::zeros(n);
for i in 0..n {
let mut sum_log_p_marginals = 0.0;
for densities in &marginal_densities {
if densities[i] > 0.0 {
sum_log_p_marginals += densities[i].ln();
}
}
if joint_density[i] > 0.0 {
local_mi[i] = joint_density[i].ln() - sum_log_p_marginals;
}
}
local_mi
}
}
};
}
impl_kernel_mi!(KernelMutualInformation2, 2, (D1, D2), (0, 1));
impl_kernel_mi!(KernelMutualInformation3, 3, (D1, D2, D3), (0, 1, 2));
impl_kernel_mi!(KernelMutualInformation4, 4, (D1, D2, D3, D4), (0, 1, 2, 3));
impl_kernel_mi!(
KernelMutualInformation5,
5,
(D1, D2, D3, D4, D5),
(0, 1, 2, 3, 4)
);
impl_kernel_mi!(
KernelMutualInformation6,
6,
(D1, D2, D3, D4, D5, D6),
(0, 1, 2, 3, 4, 5)
);
pub struct KernelConditionalMutualInformation<
const D1: usize,
const D2: usize,
const D_COND: usize,
const D_JOINT: usize,
const D1_COND: usize,
const D2_COND: usize,
> {
pub kernel_type: String,
pub bandwidth: f64,
pub series: Vec<Array2<f64>>,
pub cond: Array2<f64>,
}
impl<
const D1: usize,
const D2: usize,
const D_COND: usize,
const D_JOINT: usize,
const D1_COND: usize,
const D2_COND: usize,
> KernelConditionalMutualInformation<D1, D2, D_COND, D_JOINT, D1_COND, D2_COND>
{
pub fn new(
series: &[Array2<f64>],
cond: &Array2<f64>,
_kernel_type: String,
_bandwidth: f64,
) -> Self {
Self {
kernel_type: _kernel_type,
bandwidth: _bandwidth,
series: series.to_vec(),
cond: cond.clone(),
}
}
}
impl<
const D1: usize,
const D2: usize,
const D_COND: usize,
const D_JOINT: usize,
const D1_COND: usize,
const D2_COND: usize,
> GlobalValue for KernelConditionalMutualInformation<D1, D2, D_COND, D_JOINT, D1_COND, D2_COND>
{
fn global_value(&self) -> f64 {
self.local_values().mean().unwrap_or(0.0)
}
}
impl<
const D1: usize,
const D2: usize,
const D_COND: usize,
const D_JOINT: usize,
const D1_COND: usize,
const D2_COND: usize,
> OptionalLocalValues
for KernelConditionalMutualInformation<D1, D2, D_COND, D_JOINT, D1_COND, D2_COND>
{
fn supports_local(&self) -> bool {
true
}
fn local_values_opt(&self) -> Result<Array1<f64>, &'static str> {
Ok(self.local_values())
}
}
impl<
const D1: usize,
const D2: usize,
const D_COND: usize,
const D_JOINT: usize,
const D1_COND: usize,
const D2_COND: usize,
> ConditionalMutualInformationEstimator
for KernelConditionalMutualInformation<D1, D2, D_COND, D_JOINT, D1_COND, D2_COND>
{
}
impl<
const D1: usize,
const D2: usize,
const D_COND: usize,
const D_JOINT: usize,
const D1_COND: usize,
const D2_COND: usize,
> LocalValues for KernelConditionalMutualInformation<D1, D2, D_COND, D_JOINT, D1_COND, D2_COND>
{
fn local_values(&self) -> Array1<f64> {
let mut all_data_vec = self.series.iter().map(|d| d.view()).collect::<Vec<_>>();
all_data_vec.push(self.cond.view());
let all_data = concatenate(Axis(1), &all_data_vec).unwrap();
let p_joint_all = KernelEntropy::<D_JOINT>::new_with_kernel_type(
all_data,
self.kernel_type.clone(),
self.bandwidth,
)
.kde_probability_density();
let p_cond = KernelEntropy::<D_COND>::new_with_kernel_type(
self.cond.clone(),
self.kernel_type.clone(),
self.bandwidth,
)
.kde_probability_density();
let xi_z1 = concatenate(Axis(1), &[self.series[0].view(), self.cond.view()]).unwrap();
let p_marg1 = KernelEntropy::<D1_COND>::new_with_kernel_type(
xi_z1,
self.kernel_type.clone(),
self.bandwidth,
)
.kde_probability_density();
let xi_z2 = concatenate(Axis(1), &[self.series[1].view(), self.cond.view()]).unwrap();
let p_marg2 = KernelEntropy::<D2_COND>::new_with_kernel_type(
xi_z2,
self.kernel_type.clone(),
self.bandwidth,
)
.kde_probability_density();
let n = p_joint_all.len();
let mut local_cmi = Array1::zeros(n);
for i in 0..n {
let num = p_joint_all[i] * p_cond[i];
let den = p_marg1[i] * p_marg2[i];
if p_joint_all[i] > 0.0 && p_cond[i] > 0.0 && den > 0.0 {
local_cmi[i] = (num / den).ln();
}
}
local_cmi
}
}
pub enum KernelData {
OneDimensional(Array1<f64>),
TwoDimensional(Array2<f64>),
}
impl From<Array1<f64>> for KernelData {
fn from(array: Array1<f64>) -> Self {
KernelData::OneDimensional(array)
}
}
impl From<Array2<f64>> for KernelData {
fn from(array: Array2<f64>) -> Self {
KernelData::TwoDimensional(array)
}
}
pub struct KernelEntropy<const K: usize> {
pub points: Vec<[f64; K]>,
pub n_samples: usize,
pub kernel_type: String,
pub bandwidth: f64,
pub tree: ImmutableKdTree<f64, K>,
pub std_devs: [f64; K],
pub cholesky_factor: Option<Array2<f64>>,
pub precision_matrix: Option<Array2<f64>>,
pub max_eigenvalue: f64,
pub force_cpu: bool,
}
impl<const K: usize> CrossEntropy for KernelEntropy<K> {
fn cross_entropy(&self, other: &KernelEntropy<K>) -> f64 {
let mut sum_ln_q = 0.0;
let n_q = other.n_samples as f64;
let bw = other.bandwidth;
let (normalization_q, adaptive_radius_q) = if other.kernel_type == "gaussian" {
let det_scaled_cov_q = if let Some(ref l) = other.cholesky_factor {
let diag_prod: f64 = l.diag().iter().product();
diag_prod * diag_prod
} else {
other.std_devs.iter().map(|&s| (bw * s).powi(2)).product()
};
let norm =
n_q * (2.0 * std::f64::consts::PI).powf(K as f64 / 2.0) * det_scaled_cov_q.sqrt();
let radius = if other.n_samples > 5000 {
36.0 * other.max_eigenvalue
} else {
64.0 * other.max_eigenvalue
};
(norm, radius)
} else {
(0.0, 0.0)
};
for query_point in &self.points {
let density = if other.kernel_type == "gaussian" {
let mut local_density = 0.0;
let neighbors = other
.tree
.within_unsorted::<SquaredEuclidean>(query_point, adaptive_radius_q);
for neighbor in neighbors {
let neighbor_point = &other.points[neighbor.item as usize];
let dist_sq = other.calculate_mahalanobis_distance(query_point, neighbor_point);
local_density += (-0.5 * dist_sq).exp();
}
local_density / normalization_q
} else {
let r = bw / 2.0;
let r_eps = r + 1e-15;
let circumscribed_radius_sq = (K as f64) * r_eps * r_eps;
let candidates = other
.tree
.within_unsorted::<SquaredEuclidean>(query_point, circumscribed_radius_sq);
let mut count = 0usize;
for candidate in candidates {
let p = &other.points[candidate.item as usize];
if other.is_in_box(query_point, p, r_eps) {
count += 1;
}
}
let vol = bw.powi(K as i32);
(count as f64) / (n_q * vol)
};
if density > 0.0 {
sum_ln_q += density.ln();
} else {
sum_ln_q += 0.0;
}
}
-sum_ln_q / (self.n_samples as f64)
}
}
impl<const K: usize> JointEntropy for KernelEntropy<K> {
type Source = Array1<f64>;
type Params = (String, f64);
fn joint_entropy(series: &[Self::Source], params: Self::Params) -> f64 {
assert_eq!(
series.len(),
K,
"Number of series must match dimensionality K"
);
if series.is_empty() {
return 0.0;
}
let n_samples = series[0].len();
for s in series {
assert_eq!(s.len(), n_samples, "All series must have the same length");
}
let mut data = Array2::zeros((n_samples, K));
for (j, s) in series.iter().enumerate() {
for i in 0..n_samples {
data[[i, j]] = s[i];
}
}
let estimator = KernelEntropy::<K>::new_with_kernel_type(data, params.0, params.1);
GlobalValue::global_value(&estimator)
}
}
impl<const K: usize> KernelEntropy<K> {
pub fn new(data: impl Into<KernelData>, bandwidth: f64) -> Self {
Self::new_with_kernel_type(data, "box".to_string(), bandwidth)
}
pub fn new_with_kernel_type(
data: impl Into<KernelData>,
kernel_type: String,
bandwidth: f64,
) -> Self {
let data = data.into();
let kernel_type = kernel_type.to_lowercase();
assert!(bandwidth > 0.0);
assert!(match &data {
KernelData::OneDimensional(_) => K == 1,
KernelData::TwoDimensional(d) => d.ncols() == K,
});
let points: Vec<[f64; K]> = match &data {
KernelData::OneDimensional(arr) => {
arr.as_slice()
.expect("Array must be contiguous")
.chunks(1)
.map(|chunk| {
let mut point = [0.0; K];
point[0] = chunk[0];
point
})
.collect()
}
KernelData::TwoDimensional(arr) => {
if let Some(slice) = arr.as_slice() {
slice
.chunks(K)
.map(|chunk| {
let mut point = [0.0; K];
point.copy_from_slice(&chunk[..K]);
point
})
.collect()
} else {
arr.rows()
.into_iter()
.map(|row| {
let mut point = [0.0; K];
for (i, &val) in row.iter().enumerate() {
point[i] = val;
}
point
})
.collect()
}
}
};
let n_samples = points.len();
let tree = ImmutableKdTree::new_from_slice(&points);
let mut std_devs = [0.0; K];
let mut cholesky_factor = None;
let mut precision_matrix = None;
for dim in 0..K {
let mut mean = 0.0;
let mut m2 = 0.0;
let mut count = 0.0;
for point in &points {
count += 1.0;
let delta = point[dim] - mean;
mean += delta / count;
let delta2 = point[dim] - mean;
m2 += delta * delta2;
}
let variance = if count < 2.0 { 0.0 } else { m2 / (count - 1.0) };
std_devs[dim] = variance.sqrt();
}
let max_eigenvalue = if kernel_type == "gaussian" {
let mut data_for_cov = Array2::<f64>::zeros((K, n_samples));
for (i, point) in points.iter().enumerate() {
for dim in 0..K {
data_for_cov[[dim, i]] = point[dim];
}
}
let cov = data_for_cov
.view()
.cov(1.0)
.expect("Failed to calculate covariance matrix");
let scaled_cov = cov * (bandwidth * bandwidth);
let l = scaled_cov.cholesky(UPLO::Lower)
.expect("Covariance matrix must be positive definite. Check for redundant dimensions or duplicate data.");
cholesky_factor = Some(l);
let inv = scaled_cov
.inv()
.expect("Failed to invert covariance matrix");
precision_matrix = Some(inv);
use ndarray_linalg::EigValsh;
let eigenvalues = scaled_cov
.eigvalsh(UPLO::Lower)
.expect("Failed to calculate eigenvalues");
eigenvalues
.iter()
.cloned()
.fold(f64::NEG_INFINITY, f64::max)
} else {
let max_std = std_devs.iter().cloned().fold(0.0f64, f64::max);
(max_std * bandwidth).powi(2)
};
Self {
points,
n_samples,
kernel_type,
bandwidth,
tree,
std_devs,
cholesky_factor,
precision_matrix,
max_eigenvalue,
force_cpu: false,
}
}
pub fn new_1d(data: Array1<f64>, kernel_type: String, bandwidth: f64) -> Self {
Self::new_with_kernel_type(KernelData::OneDimensional(data), kernel_type, bandwidth)
}
pub fn new_2d(data: Array2<f64>, kernel_type: String, bandwidth: f64) -> Self {
Self::new_with_kernel_type(KernelData::TwoDimensional(data), kernel_type, bandwidth)
}
pub fn set_force_cpu(&mut self, force_cpu: bool) {
self.force_cpu = force_cpu;
}
#[inline(always)]
pub(crate) fn is_in_box(&self, query_point: &[f64; K], p: &[f64; K], r_eps: f64) -> bool {
for dim in 0..K {
if (query_point[dim] - p[dim]).abs() > r_eps {
return false;
}
}
true
}
pub fn calculate_mahalanobis_distance(&self, p1: &[f64; K], p2: &[f64; K]) -> f64 {
if let Some(ref l) = self.cholesky_factor {
let mut diff = [0.0; K];
for dim in 0..K {
diff[dim] = p1[dim] - p2[dim];
}
let mut z = [0.0; K];
for i in 0..K {
let mut sum = 0.0;
for j in 0..i {
sum += l[[i, j]] * z[j];
}
z[i] = (diff[i] - sum) / l[[i, i]];
}
z.iter().map(|&val| val * val).sum()
} else {
let mut sum = 0.0;
for dim in 0..K {
let scale = self.bandwidth * self.std_devs[dim];
if scale > 0.0 {
let diff = (p1[dim] - p2[dim]) / scale;
sum += diff * diff;
} else {
let diff = (p1[dim] - p2[dim]) / self.bandwidth;
sum += diff * diff;
}
}
sum
}
}
pub fn kde_probability_density(&self) -> Array1<f64> {
#[cfg(feature = "gpu")]
{
if !self.force_cpu {
if self.kernel_type == "box" && self.n_samples >= 2000 {
return self.box_kernel_density_gpu();
} else if self.kernel_type == "gaussian" && self.n_samples >= 500 {
return self.gaussian_kernel_density_gpu();
}
}
}
if self.kernel_type == "box" {
self.box_kernel_density_cpu()
} else {
self.gaussian_kernel_density_cpu()
}
}
pub fn box_kernel_density_cpu(&self) -> Array1<f64> {
let volume = self.bandwidth.powi(K as i32);
let n_volume = self.n_samples as f64 * volume;
let mut densities = Array1::<f64>::zeros(self.n_samples);
let r = self.bandwidth / 2.0;
let r_eps = r + 1e-15;
let circumscribed_radius_sq = (K as f64) * r_eps * r_eps;
for (i, query_point) in self.points.iter().enumerate() {
let candidates = self
.tree
.within_unsorted::<SquaredEuclidean>(query_point, circumscribed_radius_sq);
let mut count = 0usize;
for candidate in candidates {
let p = &self.points[candidate.item as usize];
if self.is_in_box(query_point, p, r_eps) {
count += 1;
}
}
densities[i] = count as f64 / n_volume;
}
densities
}
pub fn gaussian_kernel_density_cpu(&self) -> Array1<f64> {
let n = self.n_samples as f64;
let bw = self.bandwidth;
let det_scaled_cov = if let Some(ref l) = self.cholesky_factor {
let diag_prod: f64 = l.diag().iter().product();
diag_prod * diag_prod
} else {
self.std_devs.iter().map(|&s| (bw * s).powi(2)).product()
};
let normalization =
n * (2.0 * std::f64::consts::PI).powf(K as f64 / 2.0) * det_scaled_cov.sqrt();
let mut densities = Array1::<f64>::zeros(self.n_samples);
let adaptive_radius = if self.n_samples > 5000 {
36.0 * self.max_eigenvalue
} else {
64.0 * self.max_eigenvalue
};
for (i, query_point) in self.points.iter().enumerate() {
let candidates = self
.tree
.within_unsorted::<SquaredEuclidean>(query_point, adaptive_radius);
let mut sum_k = 0.0;
for candidate in candidates {
let p = &self.points[candidate.item as usize];
let dist_sq = self.calculate_mahalanobis_distance(query_point, p);
#[cfg(feature = "fast_exp")]
{
sum_k += self.fast_exp(-0.5 * dist_sq);
}
#[cfg(not(feature = "fast_exp"))]
{
sum_k += (-0.5 * dist_sq).exp();
}
}
densities[i] = sum_k / normalization;
}
densities
}
pub fn box_kernel_local_values(&self) -> Array1<f64> {
let densities = self.kde_probability_density();
densities.mapv(|d| if d > 0.0 { -d.ln() } else { 0.0 })
}
pub fn gaussian_kernel_local_values(&self) -> Array1<f64> {
let densities = self.kde_probability_density();
densities.mapv(|d| if d > 0.0 { -d.ln() } else { 0.0 })
}
pub fn box_kernel_local_values_cpu(&self) -> Array1<f64> {
let densities = self.box_kernel_density_cpu();
densities.mapv(|d| if d > 0.0 { -d.ln() } else { 0.0 })
}
pub fn gaussian_kernel_local_values_cpu(&self) -> Array1<f64> {
let densities = self.gaussian_kernel_density_cpu();
densities.mapv(|d| if d > 0.0 { -d.ln() } else { 0.0 })
}
pub fn box_kernel_local_values_legacy(&self) -> Array1<f64> {
let volume = self.bandwidth.powi(K as i32);
let n_volume = self.n_samples as f64 * volume;
let mut local_values = Array1::<f64>::zeros(self.n_samples);
let batch_size = 4;
let num_batches = self.n_samples / batch_size;
for batch in 0..num_batches {
let start_idx = batch * batch_size;
let r = self.bandwidth / 2.0;
let r_eps = r + 1e-15;
let circumscribed_radius_sq = (K as f64) * r_eps * r_eps;
for i in 0..batch_size {
let idx = start_idx + i;
let query_point = &self.points[idx];
let candidates = self
.tree
.within_unsorted::<SquaredEuclidean>(query_point, circumscribed_radius_sq);
let mut count = 0usize;
for candidate in candidates {
let p = &self.points[candidate.item as usize];
if self.is_in_box(query_point, p, r_eps) {
count += 1;
}
}
local_values[idx] = count as f64;
}
}
let r = self.bandwidth / 2.0;
let r_eps = r + 1e-15;
let circumscribed_radius_sq = (K as f64) * r_eps * r_eps;
for i in (num_batches * batch_size)..self.n_samples {
let query_point = &self.points[i];
let candidates = self
.tree
.within_unsorted::<SquaredEuclidean>(query_point, circumscribed_radius_sq);
let mut count = 0usize;
for candidate in candidates {
let p = &self.points[candidate.item as usize];
if self.is_in_box(query_point, p, r_eps) {
count += 1;
}
}
local_values[i] = count as f64;
}
local_values.mapv_inplace(|x| if x > 0.0 { -(x / n_volume).ln() } else { 0.0 });
local_values
}
#[cfg(feature = "fast_exp")]
fn fast_exp(&self, x: f64) -> f64 {
if x < -700.0 {
return 0.0;
}
if x > 700.0 {
return f64::INFINITY;
}
if x > -0.5 {
return 1.0
+ x * (1.0 + x * (0.5 + x * (1.0 / 6.0 + x * (1.0 / 24.0 + x * (1.0 / 120.0)))));
}
if x > -2.5 {
return 1.0 / (1.0 - x + x * x / 2.0 - x * x * x / 6.0);
}
1.0 / (1.0 - x + x * x / 2.0 - x * x * x / 6.0 + x * x * x * x / 24.0
- x * x * x * x * x / 120.0
+ x * x * x * x * x * x / 720.0)
}
}
impl<const K: usize> GlobalValue for KernelEntropy<K> {
fn global_value(&self) -> f64 {
self.global_from_local()
}
}
impl<const K: usize> LocalValues for KernelEntropy<K> {
fn local_values(&self) -> Array1<f64> {
let densities = self.kde_probability_density();
densities.mapv(|d| if d > 0.0 { -d.ln() } else { 0.0 })
}
}
#[cfg(test)]
mod tests {
use crate::estimators::entropy::Entropy;
use ndarray::Array2;
#[test]
fn test_is_in_box_1d_inside() {
let kernel = Entropy::nd_kernel::<1>(Array2::zeros((1, 1)), 1.0);
let q = [0.0];
let p = [0.1];
assert!(kernel.is_in_box(&q, &p, 0.2));
}
#[test]
fn test_is_in_box_1d_outside() {
let kernel = Entropy::nd_kernel::<1>(Array2::zeros((1, 1)), 1.0);
let q = [0.0];
let p = [0.3];
assert!(!kernel.is_in_box(&q, &p, 0.2));
}
#[test]
fn test_is_in_box_2d_inside() {
let kernel = Entropy::nd_kernel::<2>(Array2::zeros((1, 2)), 1.0);
let q = [0.0, 0.0];
let p = [0.1, 0.1];
assert!(kernel.is_in_box(&q, &p, 0.2));
}
#[test]
fn test_is_in_box_2d_outside_x() {
let kernel = Entropy::nd_kernel::<2>(Array2::zeros((1, 2)), 1.0);
let q = [0.0, 0.0];
let p = [0.21, 0.1];
assert!(!kernel.is_in_box(&q, &p, 0.2));
}
#[test]
fn test_is_in_box_2d_outside_y() {
let kernel = Entropy::nd_kernel::<2>(Array2::zeros((1, 2)), 1.0);
let q = [0.0, 0.0];
let p = [0.1, 0.21];
assert!(!kernel.is_in_box(&q, &p, 0.2));
}
#[test]
fn test_is_in_box_4d_inside() {
let kernel = Entropy::nd_kernel::<4>(Array2::zeros((1, 4)), 1.0);
let q = [0.0, 0.0, 0.0, 0.0];
let p = [0.1, 0.1, 0.1, 0.1];
assert!(kernel.is_in_box(&q, &p, 0.2));
}
#[test]
fn test_is_in_box_4d_outside() {
let kernel = Entropy::nd_kernel::<4>(Array2::zeros((1, 4)), 1.0);
let q = [0.0, 0.0, 0.0, 0.0];
let p = [0.1, 0.3, 0.1, 0.1];
assert!(!kernel.is_in_box(&q, &p, 0.2));
}
#[test]
fn test_is_in_box_8d_inside() {
let kernel = Entropy::nd_kernel::<8>(Array2::zeros((1, 8)), 1.0);
let q = [0.0; 8];
let p = [0.1; 8];
assert!(kernel.is_in_box(&q, &p, 0.2));
}
#[test]
fn test_is_in_box_8d_outside() {
let kernel = Entropy::nd_kernel::<8>(Array2::zeros((1, 8)), 1.0);
let q = [0.0; 8];
let p = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.21];
assert!(!kernel.is_in_box(&q, &p, 0.2));
}
}