Skip to main content

esoc_chart/chart/
histogram.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2//! Histogram series with automatic binning.
3
4use esoc_gfx::canvas::Canvas;
5use esoc_gfx::color::Color;
6use esoc_gfx::element::{DrawElement, Element};
7use esoc_gfx::geom::Rect;
8use esoc_gfx::layer::Layer;
9use esoc_gfx::style::{Fill, Stroke};
10use esoc_gfx::transform::CoordinateTransform;
11
12use crate::series::{DataBounds, SeriesRenderer};
13use crate::theme::Theme;
14
15/// Binning strategy for histograms.
16#[derive(Clone, Copy, Debug, Default)]
17pub enum BinStrategy {
18    /// Sturges' rule: `ceil(log2(n)) + 1` (default).
19    #[default]
20    Sturges,
21    /// Scott's rule: `3.5 * std / n^(1/3)`.
22    Scott,
23    /// Freedman-Diaconis rule: `2 * IQR / n^(1/3)`.
24    FreedmanDiaconis,
25    /// Fixed number of bins.
26    Fixed(usize),
27}
28
29/// A histogram series that bins data automatically.
30#[derive(Clone, Debug)]
31pub struct HistogramSeries {
32    /// Raw data to bin.
33    pub data: Vec<f64>,
34    /// Optional series label.
35    pub label: Option<String>,
36    /// Override color.
37    pub color: Option<Color>,
38    /// Override bin count (if set, overrides strategy).
39    pub bin_count: Option<usize>,
40    /// Binning strategy.
41    pub strategy: BinStrategy,
42}
43
44impl HistogramSeries {
45    /// Create a new histogram series.
46    pub fn new(data: &[f64]) -> Self {
47        Self {
48            data: data.to_vec(),
49            label: None,
50            color: None,
51            bin_count: None,
52            strategy: BinStrategy::Sturges,
53        }
54    }
55
56    /// Compute bin edges and counts.
57    fn compute_bins(&self) -> (Vec<f64>, Vec<usize>) {
58        if self.data.is_empty() {
59            return (vec![], vec![]);
60        }
61
62        let mut sorted = self.data.clone();
63        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
64
65        let min = sorted[0];
66        let max = sorted[sorted.len() - 1];
67        let n = sorted.len();
68
69        let num_bins = self.bin_count.unwrap_or_else(|| match self.strategy {
70            BinStrategy::Sturges => ((n as f64).log2().ceil() as usize + 1).max(1),
71            BinStrategy::Scott => {
72                let std_dev = std_deviation(&sorted);
73                if std_dev < 1e-15 {
74                    1
75                } else {
76                    let bin_width = 3.5 * std_dev / (n as f64).cbrt();
77                    ((max - min) / bin_width).ceil() as usize
78                }
79                .max(1)
80            }
81            BinStrategy::FreedmanDiaconis => {
82                let iqr = percentile(&sorted, 75.0) - percentile(&sorted, 25.0);
83                if iqr < 1e-15 {
84                    ((n as f64).log2().ceil() as usize + 1).max(1)
85                } else {
86                    let bin_width = 2.0 * iqr / (n as f64).cbrt();
87                    ((max - min) / bin_width).ceil() as usize
88                }
89                .max(1)
90            }
91            BinStrategy::Fixed(k) => k.max(1),
92        });
93
94        let range = if (max - min).abs() < 1e-15 {
95            1.0
96        } else {
97            max - min
98        };
99        let bin_width = range / num_bins as f64;
100
101        let mut edges = Vec::with_capacity(num_bins + 1);
102        for i in 0..=num_bins {
103            edges.push(min + i as f64 * bin_width);
104        }
105
106        let mut counts = vec![0usize; num_bins];
107        for &v in &sorted {
108            let mut idx = ((v - min) / bin_width) as usize;
109            if idx >= num_bins {
110                idx = num_bins - 1;
111            }
112            counts[idx] += 1;
113        }
114
115        (edges, counts)
116    }
117}
118
119impl SeriesRenderer for HistogramSeries {
120    fn data_bounds(&self) -> DataBounds {
121        let (edges, counts) = self.compute_bins();
122        if edges.is_empty() {
123            return DataBounds::new(0.0, 1.0, 0.0, 1.0);
124        }
125        let x_min = edges[0];
126        let x_max = edges[edges.len() - 1];
127        let y_max = counts.iter().copied().max().unwrap_or(1) as f64;
128        DataBounds::new(x_min, x_max, 0.0, y_max)
129    }
130
131    fn render(
132        &self,
133        canvas: &mut Canvas,
134        transform: &CoordinateTransform,
135        theme: &Theme,
136        series_index: usize,
137    ) {
138        let (edges, counts) = self.compute_bins();
139        if edges.len() < 2 {
140            return;
141        }
142
143        let color = self
144            .color
145            .unwrap_or_else(|| theme.palette.get(series_index));
146
147        for i in 0..counts.len() {
148            let x0 = edges[i];
149            let x1 = edges[i + 1];
150            let h = counts[i] as f64;
151
152            let p_tl = transform.to_pixel(x0, h);
153            let p_br = transform.to_pixel(x1, 0.0);
154            let rx = p_tl.x.min(p_br.x);
155            let ry = p_tl.y.min(p_br.y);
156            let rw = (p_br.x - p_tl.x).abs();
157            let rh = (p_br.y - p_tl.y).abs();
158
159            canvas.add(DrawElement::new(
160                Element::Rect {
161                    rect: Rect::new(rx, ry, rw, rh),
162                    fill: Fill::Solid(color.with_alpha(0.7)),
163                    stroke: Some(Stroke::solid(color, 0.5)),
164                    rx: 0.0,
165                },
166                Layer::Data,
167            ));
168        }
169    }
170
171    fn label(&self) -> Option<&str> {
172        self.label.as_deref()
173    }
174}
175
176fn std_deviation(sorted: &[f64]) -> f64 {
177    let n = sorted.len() as f64;
178    let mean = sorted.iter().sum::<f64>() / n;
179    let variance = sorted.iter().map(|&v| (v - mean).powi(2)).sum::<f64>() / n;
180    variance.sqrt()
181}
182
183fn percentile(sorted: &[f64], p: f64) -> f64 {
184    if sorted.is_empty() {
185        return 0.0;
186    }
187    let idx = (p / 100.0 * (sorted.len() - 1) as f64).clamp(0.0, (sorted.len() - 1) as f64);
188    let lo = idx.floor() as usize;
189    let hi = idx.ceil() as usize;
190    let frac = idx - lo as f64;
191    sorted[lo] * (1.0 - frac) + sorted[hi] * frac
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    #[test]
199    fn test_histogram_bins() {
200        let h = HistogramSeries::new(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
201        let (edges, counts) = h.compute_bins();
202        assert!(edges.len() >= 2);
203        assert_eq!(counts.iter().sum::<usize>(), 8);
204    }
205
206    #[test]
207    fn test_histogram_empty() {
208        let h = HistogramSeries::new(&[]);
209        let (edges, counts) = h.compute_bins();
210        assert!(edges.is_empty());
211        assert!(counts.is_empty());
212    }
213}