Skip to main content

ggplot_rs/stat/
bin.rs

1use crate::aes::Aesthetic;
2use crate::data::{DataFrame, Value};
3use crate::scale::ScaleSet;
4
5use super::Stat;
6
7/// ggplot2 bin alignment: place bins so an edge falls on `boundary` (mod
8/// `width`), then return the left edge of the first bin (`origin`, ≤ `min`) and
9/// the number of bins needed to cover `[min, max]`.
10pub(crate) fn aligned_bins_at(min: f64, max: f64, width: f64, boundary: f64) -> (f64, usize) {
11    let shift = ((min - boundary) / width).floor();
12    let origin = boundary + shift * width;
13    let n = (((max - origin) / width).ceil() as usize).max(1);
14    (origin, n)
15}
16
17/// 1-D histogram alignment: a *bin* is centered on 0 (`boundary = width/2`),
18/// matching `geom_histogram`.
19fn aligned_bins(min: f64, max: f64, width: f64) -> (f64, usize) {
20    aligned_bins_at(min, max, width, width / 2.0)
21}
22
23/// Bins continuous x values into histogram bins.
24pub struct StatBin {
25    pub bins: usize,
26    pub binwidth: Option<f64>,
27}
28
29impl StatBin {
30    /// Set bin width (overrides bins count).
31    pub fn with_binwidth(mut self, width: f64) -> Self {
32        self.binwidth = Some(width);
33        self
34    }
35
36    /// Set number of bins.
37    pub fn with_bins(mut self, bins: usize) -> Self {
38        self.bins = bins;
39        self.binwidth = None;
40        self
41    }
42}
43
44impl Default for StatBin {
45    fn default() -> Self {
46        StatBin {
47            bins: 30,
48            binwidth: None,
49        }
50    }
51}
52
53impl Stat for StatBin {
54    fn compute_group(&self, data: &DataFrame, _scales: &ScaleSet) -> DataFrame {
55        let x_col = match data.column("x") {
56            Some(c) => c,
57            None => return DataFrame::new(),
58        };
59
60        let values: Vec<f64> = x_col.iter().filter_map(|v| v.as_f64()).collect();
61        if values.is_empty() {
62            return DataFrame::new();
63        }
64
65        let min = values.iter().cloned().fold(f64::INFINITY, f64::min);
66        let max = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
67
68        // Handle case where all values are the same
69        let (min, max) = if (max - min).abs() < f64::EPSILON {
70            (min - 0.5, max + 0.5)
71        } else {
72            (min, max)
73        };
74
75        // Match ggplot2's bin_breaks: for a bin count, width spans the range in
76        // `bins - 1` steps; bins are then aligned to `boundary = width/2` so a
77        // bin is centered on 0 (the origin is shifted left of the data min),
78        // rather than starting exactly at the data minimum.
79        let (bin_width, origin, n_bins) = if let Some(bw) = self.binwidth {
80            let (o, n) = aligned_bins(min, max, bw);
81            (bw, o, n)
82        } else if self.bins <= 1 {
83            (max - min, min, 1)
84        } else {
85            let bw = (max - min) / (self.bins - 1) as f64;
86            let (o, n) = aligned_bins(min, max, bw);
87            (bw, o, n)
88        };
89
90        let mut counts = vec![0usize; n_bins];
91
92        for &v in &values {
93            // ggplot2's default bins are right-closed: (a, b]. A point on a
94            // boundary falls in the lower bin.
95            let raw = ((v - origin) / bin_width).ceil() as i64 - 1;
96            let bin = raw.clamp(0, n_bins as i64 - 1) as usize;
97            counts[bin] += 1;
98        }
99
100        let total = values.len() as f64;
101        let mut x_vals = Vec::with_capacity(n_bins);
102        let mut y_vals = Vec::with_capacity(n_bins);
103        let mut density_vals = Vec::with_capacity(n_bins);
104        let mut xmin_vals = Vec::with_capacity(n_bins);
105        let mut xmax_vals = Vec::with_capacity(n_bins);
106
107        for (i, &count) in counts.iter().enumerate() {
108            let bin_min = origin + i as f64 * bin_width;
109            let bin_max = bin_min + bin_width;
110            let center = (bin_min + bin_max) / 2.0;
111
112            x_vals.push(Value::Float(center));
113            y_vals.push(Value::Float(count as f64));
114            density_vals.push(Value::Float(count as f64 / (total * bin_width)));
115            xmin_vals.push(Value::Float(bin_min));
116            xmax_vals.push(Value::Float(bin_max));
117        }
118
119        let mut result = DataFrame::new();
120        result.add_column("x".to_string(), x_vals);
121        result.add_column("y".to_string(), y_vals.clone());
122        // Expose the count under its ggplot stat name for after_stat expressions
123        // (e.g. after_stat_y("count / sum(count)")).
124        result.add_column("count".to_string(), y_vals);
125        result.add_column("density".to_string(), density_vals);
126        result.add_column("xmin".to_string(), xmin_vals);
127        result.add_column("xmax".to_string(), xmax_vals);
128        result
129    }
130
131    fn required_aes(&self) -> Vec<Aesthetic> {
132        vec![Aesthetic::X]
133    }
134
135    fn name(&self) -> &str {
136        "bin"
137    }
138}