Skip to main content

ggplot_rs/stat/
bin.rs

1use crate::aes::Aesthetic;
2use crate::data::{DataFrame, Value};
3use crate::scale::ScaleSet;
4
5use super::Stat;
6
7/// Bins continuous x values into histogram bins.
8pub struct StatBin {
9    pub bins: usize,
10    pub binwidth: Option<f64>,
11}
12
13impl StatBin {
14    /// Set bin width (overrides bins count).
15    pub fn with_binwidth(mut self, width: f64) -> Self {
16        self.binwidth = Some(width);
17        self
18    }
19
20    /// Set number of bins.
21    pub fn with_bins(mut self, bins: usize) -> Self {
22        self.bins = bins;
23        self.binwidth = None;
24        self
25    }
26}
27
28impl Default for StatBin {
29    fn default() -> Self {
30        StatBin {
31            bins: 30,
32            binwidth: None,
33        }
34    }
35}
36
37impl Stat for StatBin {
38    fn compute_group(&self, data: &DataFrame, _scales: &ScaleSet) -> DataFrame {
39        let x_col = match data.column("x") {
40            Some(c) => c,
41            None => return DataFrame::new(),
42        };
43
44        let values: Vec<f64> = x_col.iter().filter_map(|v| v.as_f64()).collect();
45        if values.is_empty() {
46            return DataFrame::new();
47        }
48
49        let min = values.iter().cloned().fold(f64::INFINITY, f64::min);
50        let max = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
51
52        // Handle case where all values are the same
53        let (min, max) = if (max - min).abs() < f64::EPSILON {
54            (min - 0.5, max + 0.5)
55        } else {
56            (min, max)
57        };
58
59        // Determine bin width and count
60        let (bin_width, n_bins) = if let Some(bw) = self.binwidth {
61            let n = ((max - min) / bw).ceil() as usize;
62            (bw, n.max(1))
63        } else {
64            let bw = (max - min) / self.bins as f64;
65            (bw, self.bins)
66        };
67
68        let mut counts = vec![0usize; n_bins];
69
70        for &v in &values {
71            let bin = ((v - min) / bin_width).floor() as usize;
72            let bin = bin.min(n_bins - 1); // Clamp last value
73            counts[bin] += 1;
74        }
75
76        let total = values.len() as f64;
77        let mut x_vals = Vec::with_capacity(n_bins);
78        let mut y_vals = Vec::with_capacity(n_bins);
79        let mut density_vals = Vec::with_capacity(n_bins);
80        let mut xmin_vals = Vec::with_capacity(n_bins);
81        let mut xmax_vals = Vec::with_capacity(n_bins);
82
83        for (i, &count) in counts.iter().enumerate() {
84            let bin_min = min + i as f64 * bin_width;
85            let bin_max = bin_min + bin_width;
86            let center = (bin_min + bin_max) / 2.0;
87
88            x_vals.push(Value::Float(center));
89            y_vals.push(Value::Float(count as f64));
90            density_vals.push(Value::Float(count as f64 / (total * bin_width)));
91            xmin_vals.push(Value::Float(bin_min));
92            xmax_vals.push(Value::Float(bin_max));
93        }
94
95        let mut result = DataFrame::new();
96        result.add_column("x".to_string(), x_vals);
97        result.add_column("y".to_string(), y_vals.clone());
98        // Expose the count under its ggplot stat name for after_stat expressions
99        // (e.g. after_stat_y("count / sum(count)")).
100        result.add_column("count".to_string(), y_vals);
101        result.add_column("density".to_string(), density_vals);
102        result.add_column("xmin".to_string(), xmin_vals);
103        result.add_column("xmax".to_string(), xmax_vals);
104        result
105    }
106
107    fn required_aes(&self) -> Vec<Aesthetic> {
108        vec![Aesthetic::X]
109    }
110
111    fn name(&self) -> &str {
112        "bin"
113    }
114}