Skip to main content

ggplot_rs/stat/
density.rs

1use crate::aes::Aesthetic;
2use crate::data::{DataFrame, Value};
3use crate::scale::ScaleSet;
4
5use super::Stat;
6
7/// Gaussian kernel density estimation with Silverman bandwidth.
8pub struct StatDensity {
9    pub n_points: usize,
10}
11
12impl Default for StatDensity {
13    fn default() -> Self {
14        StatDensity { n_points: 512 }
15    }
16}
17
18impl Stat for StatDensity {
19    fn compute_group(&self, data: &DataFrame, _scales: &ScaleSet) -> DataFrame {
20        let x_col = match data.column("x") {
21            Some(c) => c,
22            None => return DataFrame::new(),
23        };
24
25        let values: Vec<f64> = x_col.iter().filter_map(|v| v.as_f64()).collect();
26        if values.len() < 2 {
27            return DataFrame::new();
28        }
29
30        let n = values.len() as f64;
31        let mean = values.iter().sum::<f64>() / n;
32        let var = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1.0);
33        let sd = var.sqrt();
34
35        // Silverman's rule of thumb
36        let bandwidth = 0.9 * sd.min(iqr(&values) / 1.34) * n.powf(-0.2);
37        let bandwidth = if bandwidth > 0.0 { bandwidth } else { sd * 0.5 };
38
39        let x_min = values.iter().cloned().fold(f64::INFINITY, f64::min) - 3.0 * bandwidth;
40        let x_max = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max) + 3.0 * bandwidth;
41        let step = (x_max - x_min) / (self.n_points - 1) as f64;
42
43        let mut x_vals = Vec::with_capacity(self.n_points);
44        let mut y_vals = Vec::with_capacity(self.n_points);
45
46        for i in 0..self.n_points {
47            let x = x_min + i as f64 * step;
48            let density: f64 = values
49                .iter()
50                .map(|xi| gaussian_kernel((x - xi) / bandwidth))
51                .sum::<f64>()
52                / (n * bandwidth);
53
54            x_vals.push(Value::Float(x));
55            y_vals.push(Value::Float(density));
56        }
57
58        let mut result = DataFrame::new();
59        result.add_column("x".to_string(), x_vals);
60        result.add_column("y".to_string(), y_vals);
61
62        // Carry over grouping columns
63        for col_name in &["color", "fill", "group"] {
64            if let Some(col) = data.column(col_name) {
65                if let Some(first) = col.first() {
66                    result.add_column(col_name.to_string(), vec![first.clone(); self.n_points]);
67                }
68            }
69        }
70
71        result
72    }
73
74    fn required_aes(&self) -> Vec<Aesthetic> {
75        vec![Aesthetic::X]
76    }
77
78    fn name(&self) -> &str {
79        "density"
80    }
81}
82
83fn gaussian_kernel(x: f64) -> f64 {
84    (-(x * x) / 2.0).exp() / (2.0 * std::f64::consts::PI).sqrt()
85}
86
87fn iqr(values: &[f64]) -> f64 {
88    let mut sorted = values.to_vec();
89    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
90    quantile_type7(&sorted, 0.75) - quantile_type7(&sorted, 0.25)
91}
92
93/// R-compatible type-7 quantile interpolation (R's default `quantile()` method).
94fn quantile_type7(sorted: &[f64], p: f64) -> f64 {
95    let n = sorted.len();
96    if n == 0 {
97        return 0.0;
98    }
99    if n == 1 {
100        return sorted[0];
101    }
102    let h = (n - 1) as f64 * p;
103    let lo = h.floor() as usize;
104    let hi = (lo + 1).min(n - 1);
105    let frac = h - lo as f64;
106    sorted[lo] + frac * (sorted[hi] - sorted[lo])
107}