use crate::common::{gaussian_kernel, interquartile_range, standard_deviation};
use crate::kde::{KdeError, KdeResult};
use ndarray::Array2;
use realfft::RealFftPlanner;
use realfft::num_complex::Complex;
#[derive(Debug)]
pub struct KernelDensity2D {
pub x: Vec<f64>,
pub y: Vec<f64>,
pub z: Array2<f64>,
}
impl KernelDensity2D {
pub fn estimate(
data_x: &[f64],
data_y: &[f64],
adjust: f64,
n_points: usize,
) -> KdeResult<Self> {
if data_x.len() != data_y.len() {
return Err(KdeError::StatsError(
"X and Y data must have the same length".to_string(),
));
}
if data_x.is_empty() {
return Err(KdeError::EmptyData);
}
let mut clean_x = Vec::new();
let mut clean_y = Vec::new();
for i in 0..data_x.len() {
if data_x[i].is_finite() && data_y[i].is_finite() {
clean_x.push(data_x[i]);
clean_y.push(data_y[i]);
}
}
if clean_x.len() < 3 {
return Err(KdeError::InsufficientData {
min: 3,
actual: clean_x.len(),
});
}
let n = clean_x.len() as f64;
let std_dev_x = standard_deviation(&clean_x)
.map_err(|e| KdeError::StatsError(e))?;
let iqr_x = interquartile_range(&clean_x)
.map_err(|e| KdeError::StatsError(e))?;
let bw_factor_x = 0.9 * std_dev_x.min(iqr_x / 1.34) * n.powf(-0.2);
let bandwidth_x = bw_factor_x * adjust;
let std_dev_y = standard_deviation(&clean_y)
.map_err(|e| KdeError::StatsError(e))?;
let iqr_y = interquartile_range(&clean_y)
.map_err(|e| KdeError::StatsError(e))?;
let bw_factor_y = 0.9 * std_dev_y.min(iqr_y / 1.34) * n.powf(-0.2);
let bandwidth_y = bw_factor_y * adjust;
let x_min = clean_x.iter().cloned().fold(f64::INFINITY, f64::min);
let x_max = clean_x.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let y_min = clean_y.iter().cloned().fold(f64::INFINITY, f64::min);
let y_max = clean_y.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let x_grid_min = x_min - 3.0 * bandwidth_x;
let x_grid_max = x_max + 3.0 * bandwidth_x;
let y_grid_min = y_min - 3.0 * bandwidth_y;
let y_grid_max = y_max + 3.0 * bandwidth_y;
let x: Vec<f64> = (0..n_points)
.map(|i| {
x_grid_min + (x_grid_max - x_grid_min) * (i as f64) / (n_points - 1) as f64
})
.collect();
let y: Vec<f64> = (0..n_points)
.map(|i| {
y_grid_min + (y_grid_max - y_grid_min) * (i as f64) / (n_points - 1) as f64
})
.collect();
let z = kde2d_fft(&clean_x, &clean_y, &x, &y, bandwidth_x, bandwidth_y, n)?;
Ok(KernelDensity2D { x, y, z })
}
pub fn find_contour(&self, threshold: f64) -> Vec<(f64, f64)> {
let max_density = self.z.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let density_threshold = threshold * max_density;
let mut contour_points = Vec::new();
for i in 0..self.x.len() {
for j in 0..self.y.len() {
if self.z[[i, j]] >= density_threshold {
let is_boundary = (i > 0 && self.z[[i - 1, j]] < density_threshold)
|| (i < self.x.len() - 1 && self.z[[i + 1, j]] < density_threshold)
|| (j > 0 && self.z[[i, j - 1]] < density_threshold)
|| (j < self.y.len() - 1 && self.z[[i, j + 1]] < density_threshold);
if is_boundary {
contour_points.push((self.x[i], self.y[j]));
}
}
}
}
contour_points
}
pub fn density_at(&self, x: f64, y: f64) -> f64 {
let x_idx = self.find_grid_index(&self.x, x);
let y_idx = self.find_grid_index(&self.y, y);
if x_idx >= self.x.len() || y_idx >= self.y.len() {
return 0.0;
}
let x0 = if x_idx > 0 { x_idx - 1 } else { 0 };
let x1 = x_idx.min(self.x.len() - 1);
let y0 = if y_idx > 0 { y_idx - 1 } else { 0 };
let y1 = y_idx.min(self.y.len() - 1);
let z00 = self.z[[x0, y0]];
let z01 = self.z[[x0, y1]];
let z10 = self.z[[x1, y0]];
let z11 = self.z[[x1, y1]];
let dx = if x1 > x0 {
(x - self.x[x0]) / (self.x[x1] - self.x[x0])
} else {
0.0
};
let dy = if y1 > y0 {
(y - self.y[y0]) / (self.y[y1] - self.y[y0])
} else {
0.0
};
z00 * (1.0 - dx) * (1.0 - dy)
+ z10 * dx * (1.0 - dy)
+ z01 * (1.0 - dx) * dy
+ z11 * dx * dy
}
fn find_grid_index(&self, grid: &[f64], value: f64) -> usize {
if value <= grid[0] {
return 0;
}
if value >= grid[grid.len() - 1] {
return grid.len() - 1;
}
let mut left = 0;
let mut right = grid.len() - 1;
while right - left > 1 {
let mid = (left + right) / 2;
if grid[mid] <= value {
left = mid;
} else {
right = mid;
}
}
left
}
}
fn kde2d_fft(
data_x: &[f64],
data_y: &[f64],
x_grid: &[f64],
y_grid: &[f64],
bandwidth_x: f64,
bandwidth_y: f64,
n: f64,
) -> KdeResult<Array2<f64>> {
let nx = x_grid.len();
let ny = y_grid.len();
if nx < 2 || ny < 2 {
return Err(KdeError::StatsError(
"Grid must have at least 2 points in each dimension".to_string(),
));
}
let x_spacing = (x_grid[nx - 1] - x_grid[0]) / (nx - 1) as f64;
let y_spacing = (y_grid[ny - 1] - y_grid[0]) / (ny - 1) as f64;
let mut binned = Array2::<f64>::zeros((nx, ny));
for (&x, &y) in data_x.iter().zip(data_y.iter()) {
let x_idx = ((x - x_grid[0]) / x_spacing).floor() as isize;
let y_idx = ((y - y_grid[0]) / y_spacing).floor() as isize;
if x_idx >= 0 && (x_idx as usize) < nx && y_idx >= 0 && (y_idx as usize) < ny {
binned[[x_idx as usize, y_idx as usize]] += 1.0;
}
}
let kernel_center_x = (nx - 1) as f64 / 2.0;
let kernel_center_y = (ny - 1) as f64 / 2.0;
let mut kernel = Array2::<f64>::zeros((nx, ny));
for i in 0..nx {
for j in 0..ny {
let grid_x = (i as f64 - kernel_center_x) * x_spacing;
let grid_y = (j as f64 - kernel_center_y) * y_spacing;
let u_x = grid_x / bandwidth_x;
let u_y = grid_y / bandwidth_y;
kernel[[i, j]] = gaussian_kernel(u_x) * gaussian_kernel(u_y);
}
}
let _fft_size_x = (2 * nx).next_power_of_two();
let _fft_size_y = (2 * ny).next_power_of_two();
let mut density = Array2::<f64>::zeros((nx, ny));
for j in 0..ny {
let mut row_binned = vec![0.0; nx];
let mut row_kernel = vec![0.0; nx];
for i in 0..nx {
row_binned[i] = binned[[i, j]];
row_kernel[i] = kernel[[i, j]];
}
let row_density = kde1d_row(&row_binned, &row_kernel, bandwidth_x, n)?;
for i in 0..nx {
density[[i, j]] = row_density[i];
}
}
for i in 0..nx {
let mut col_density = vec![0.0; ny];
for j in 0..ny {
col_density[j] = density[[i, j]];
}
for j in 0..ny {
let mut sum = 0.0;
let mut weight_sum = 0.0;
for k in 0..ny {
let dist = (j as f64 - k as f64) * y_spacing;
let weight = gaussian_kernel(dist / bandwidth_y);
sum += col_density[k] * weight;
weight_sum += weight;
}
if weight_sum > 0.0 {
density[[i, j]] = sum / weight_sum;
}
}
}
let total: f64 = density.iter().sum();
if total > 0.0 {
for val in density.iter_mut() {
*val /= total * x_spacing * y_spacing;
}
}
Ok(density)
}
fn kde1d_row(
binned: &[f64],
kernel: &[f64],
bandwidth: f64,
n: f64,
) -> KdeResult<Vec<f64>> {
let m = binned.len();
let fft_size = (2 * m).next_power_of_two();
let mut planner = RealFftPlanner::<f64>::new();
let r2c = planner.plan_fft_forward(fft_size);
let c2r = planner.plan_fft_inverse(fft_size);
let mut binned_padded = vec![0.0; fft_size];
binned_padded[..m].copy_from_slice(binned);
let mut kernel_padded = vec![0.0; fft_size];
let kernel_start = (fft_size - m) / 2;
let first_half = (m + 1) / 2;
kernel_padded[kernel_start..kernel_start + first_half]
.copy_from_slice(&kernel[m / 2..]);
let second_half = m / 2;
if second_half > 0 {
kernel_padded[..second_half].copy_from_slice(&kernel[..second_half]);
}
let mut binned_spectrum = r2c.make_output_vec();
r2c.process(&mut binned_padded, &mut binned_spectrum)
.map_err(|e| KdeError::FftError(format!("FFT forward failed: {}", e)))?;
let mut kernel_spectrum = r2c.make_output_vec();
r2c.process(&mut kernel_padded, &mut kernel_spectrum)
.map_err(|e| KdeError::FftError(format!("FFT forward failed: {}", e)))?;
let mut conv_spectrum: Vec<Complex<f64>> = binned_spectrum
.iter()
.zip(kernel_spectrum.iter())
.map(|(a, b)| a * b)
.collect();
let mut conv_result = c2r.make_output_vec();
c2r.process(&mut conv_spectrum, &mut conv_result)
.map_err(|e| KdeError::FftError(format!("FFT inverse failed: {}", e)))?;
let kernel_start = (fft_size - m) / 2;
let mut density = Vec::with_capacity(m);
for i in 0..m {
let idx = (kernel_start + i) % fft_size;
density.push(conv_result[idx] / (fft_size as f64 * n * bandwidth));
}
Ok(density)
}