mod fft;
mod kde2d;
#[cfg(feature = "gpu")]
mod gpu;
use crate::common::{interquartile_range, standard_deviation};
use thiserror::Error;
pub use fft::kde_fft;
pub use kde2d::KernelDensity2D;
#[cfg(feature = "gpu")]
pub use gpu::kde_fft_gpu;
#[derive(Error, Debug)]
pub enum KdeError {
#[error("Empty data for KDE")]
EmptyData,
#[error("Insufficient data: need at least {min} points, got {actual}")]
InsufficientData { min: usize, actual: usize },
#[error("Statistics error: {0}")]
StatsError(String),
#[error("FFT error: {0}")]
FftError(String),
}
pub type KdeResult<T> = Result<T, KdeError>;
pub struct KernelDensity {
pub x: Vec<f64>,
pub y: Vec<f64>,
}
impl KernelDensity {
pub fn estimate(data: &[f64], adjust: f64, n_points: usize) -> KdeResult<Self> {
if data.is_empty() {
return Err(KdeError::EmptyData);
}
let clean_data: Vec<f64> = data.iter().filter(|x| x.is_finite()).copied().collect();
if clean_data.len() < 3 {
return Err(KdeError::InsufficientData {
min: 3,
actual: clean_data.len(),
});
}
let n = clean_data.len() as f64;
let std_dev = standard_deviation(&clean_data)
.map_err(|e| KdeError::StatsError(e))?;
let iqr = interquartile_range(&clean_data)
.map_err(|e| KdeError::StatsError(e))?;
let bw_factor = 0.9 * std_dev.min(iqr / 1.34) * n.powf(-0.2);
let bandwidth = bw_factor * adjust;
let data_min = clean_data.iter().cloned().fold(f64::INFINITY, f64::min);
let data_max = clean_data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let grid_min = data_min - 3.0 * bandwidth;
let grid_max = data_max + 3.0 * bandwidth;
let x: Vec<f64> = (0..n_points)
.map(|i| grid_min + (grid_max - grid_min) * (i as f64) / (n_points - 1) as f64)
.collect();
#[cfg(feature = "gpu")]
let y = if crate::gpu::is_gpu_available() {
kde_fft_gpu(&clean_data, &x, bandwidth, n)?
} else {
kde_fft(&clean_data, &x, bandwidth, n)?
};
#[cfg(not(feature = "gpu"))]
let y = kde_fft(&clean_data, &x, bandwidth, n)?;
Ok(KernelDensity { x, y })
}
pub fn find_peaks(&self, peak_removal: f64) -> Vec<f64> {
if self.y.len() < 3 {
return Vec::new();
}
let max_y = self.y.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let threshold = peak_removal * max_y;
let mut peaks = Vec::new();
for i in 1..self.y.len() - 1 {
if self.y[i] > self.y[i - 1] && self.y[i] > self.y[i + 1] && self.y[i] > threshold {
peaks.push(self.x[i]);
}
}
if peaks.is_empty() {
if let Some((idx, _)) = self
.y
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
{
peaks.push(self.x[idx]);
}
}
peaks
}
pub fn density_at(&self, x: f64) -> f64 {
if self.x.is_empty() || self.y.is_empty() {
return 0.0;
}
if x <= self.x[0] {
return self.y[0];
}
if x >= self.x[self.x.len() - 1] {
return self.y[self.y.len() - 1];
}
let mut left_idx = 0;
let mut right_idx = self.x.len() - 1;
while right_idx - left_idx > 1 {
let mid = (left_idx + right_idx) / 2;
if self.x[mid] <= x {
left_idx = mid;
} else {
right_idx = mid;
}
}
let x0 = self.x[left_idx];
let x1 = self.x[right_idx];
let y0 = self.y[left_idx];
let y1 = self.y[right_idx];
if (x1 - x0).abs() < 1e-10 {
y0
} else {
y0 + (y1 - y0) * (x - x0) / (x1 - x0)
}
}
}