lowcharts/plot/
histogram.rs

1use std::fmt;
2use std::ops::Range;
3
4use yansi::Color::Blue;
5
6use crate::format::{F64Formatter, HorizontalScale};
7use crate::stats::Stats;
8
9#[derive(Debug)]
10/// A struct that represents a bucket of an histogram.
11struct Bucket {
12    range: Range<f64>,
13    count: usize,
14}
15
16impl Bucket {
17    fn new(range: Range<f64>) -> Self {
18        Self { range, count: 0 }
19    }
20
21    fn inc(&mut self) {
22        self.count += 1;
23    }
24}
25
26/// A struct representing the options to build an histogram.
27pub struct Histogram {
28    vec: Vec<Bucket>,
29    step: f64,
30    // Maximum of all bucket counts
31    top: usize,
32    last: usize,
33    stats: Stats,
34    log_scale: bool,
35    precision: Option<usize>, // If None, then human friendly display will be used
36}
37
38/// A struct holding data to plot a Histogram of numerical data.
39#[derive(Default)]
40pub struct HistogramOptions {
41    /// `intervals` is the number of histogram buckets to display (capped to the
42    /// length of input data).
43    pub intervals: usize,
44    /// If true, logarithmic scale will be used for buckets
45    pub log_scale: bool,
46    /// `precision` is an Option with the number of decimals to display.  If
47    /// "None" is used, human units will be used, with an heuristic based on the
48    /// input data for deciding the units and the decimal places.
49    pub precision: Option<usize>,
50}
51
52impl Histogram {
53    /// Creates a Histogram from a vector of numerical data.
54    ///
55    /// `options` is a `HistogramOptions` struct with the preferences to create
56    /// histogram.
57    pub fn new(vec: &[f64], mut options: HistogramOptions) -> Self {
58        let mut stats = Stats::new(vec, options.precision);
59        if options.log_scale {
60            stats.min = 0.0; // We will silently discard negative values
61        }
62        options.intervals = options.intervals.clamp(1, vec.len());
63        let mut histogram = Self::new_with_stats(stats, &options);
64        histogram.load(vec);
65        histogram
66    }
67
68    /// Creates a Histogram with no input data.
69    ///
70    /// Parameters are similar to those on the `new` method, but a parameter
71    /// named `stats` is needed to decide how future data (to be injected with
72    /// the load method) will be accommodated.
73    pub fn new_with_stats(stats: Stats, options: &HistogramOptions) -> Self {
74        let step = if options.log_scale {
75            f64::NAN
76        } else {
77            (stats.max - stats.min) / options.intervals as f64
78        };
79        Self {
80            vec: Self::build_buckets(stats.min..stats.max, options),
81            step,
82            top: 0,
83            last: options.intervals - 1,
84            stats,
85            log_scale: options.log_scale,
86            precision: options.precision,
87        }
88    }
89
90    /// Add to the `Histogram` data the values of a slice of numerical data.
91    pub fn load(&mut self, vec: &[f64]) {
92        for x in vec {
93            self.add(*x);
94        }
95    }
96
97    /// Add to the `Histogram` a single piece of numerical data.
98    pub fn add(&mut self, n: f64) {
99        if let Some(slot) = self.find_slot(n) {
100            self.vec[slot].inc();
101            self.top = self.top.max(self.vec[slot].count);
102        }
103    }
104
105    fn find_slot(&self, n: f64) -> Option<usize> {
106        if n < self.stats.min || n > self.stats.max {
107            return None;
108        }
109        if self.log_scale {
110            let mut bucket = None;
111            for i in 0..self.vec.len() {
112                if self.vec[i].range.end >= n {
113                    bucket = Some(i);
114                    break;
115                }
116            }
117            bucket
118        } else {
119            Some((((n - self.stats.min) / self.step) as usize).min(self.last))
120        }
121    }
122
123    fn build_buckets(range: Range<f64>, options: &HistogramOptions) -> Vec<Bucket> {
124        let mut vec = Vec::<Bucket>::with_capacity(options.intervals);
125        if options.log_scale {
126            let first_bucket_size = range.end / (2_f64.powi(options.intervals as i32) - 1.0);
127            let mut lower = 0.0;
128            for i in 0..options.intervals {
129                let upper = lower + 2_f64.powi(i as i32) * first_bucket_size;
130                vec.push(Bucket::new(lower..upper));
131                lower = upper;
132            }
133        } else {
134            let step = (range.end - range.start) / options.intervals as f64;
135            let mut lower = range.start;
136            for _ in 0..options.intervals {
137                vec.push(Bucket::new(lower..lower + step));
138                lower += step;
139            }
140        }
141        vec
142    }
143}
144
145impl fmt::Display for Histogram {
146    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
147        write!(f, "{}", self.stats)?;
148        let formatter = match self.precision {
149            None => F64Formatter::new_with_range(self.stats.min..self.stats.max),
150            Some(n) => F64Formatter::new(n),
151        };
152        let writer = HistWriter {
153            width: f.width().unwrap_or(110),
154            formatter,
155        };
156        writer.write(f, self)
157    }
158}
159
160struct HistWriter {
161    width: usize,
162    formatter: F64Formatter,
163}
164
165impl HistWriter {
166    pub fn write(&self, f: &mut fmt::Formatter, hist: &Histogram) -> fmt::Result {
167        let width_range = self.get_width(hist);
168        let width_count = ((hist.top as f64).log10().ceil() as usize).max(1);
169        let horizontal_scale =
170            HorizontalScale::new(hist.top / self.get_max_bar_len(width_range + width_count));
171        writeln!(f, "{horizontal_scale}")?;
172        for x in &hist.vec {
173            self.write_bucket(f, x, &horizontal_scale, width_range, width_count)?;
174        }
175        Ok(())
176    }
177
178    fn write_bucket(
179        &self,
180        f: &mut fmt::Formatter,
181        bucket: &Bucket,
182        horizontal_scale: &HorizontalScale,
183        width: usize,
184        width_count: usize,
185    ) -> fmt::Result {
186        writeln!(
187            f,
188            "[{range}] [{count}] {bar}",
189            range = Blue.paint(format!(
190                "{:>width$} .. {:>width$}",
191                self.formatter.format(bucket.range.start),
192                self.formatter.format(bucket.range.end),
193                width = width,
194            )),
195            count = horizontal_scale.get_count(bucket.count, width_count),
196            bar = horizontal_scale.get_bar(bucket.count)
197        )
198    }
199
200    fn get_width(&self, hist: &Histogram) -> usize {
201        self.formatter
202            .format(hist.stats.min)
203            .len()
204            .max(self.formatter.format(hist.stats.max).len())
205    }
206
207    fn get_max_bar_len(&self, fixed_width: usize) -> usize {
208        const EXTRA_CHARS: usize = 10;
209        if self.width < fixed_width + EXTRA_CHARS {
210            75
211        } else {
212            self.width - fixed_width - EXTRA_CHARS
213        }
214    }
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220    use float_eq::assert_float_eq;
221    use yansi::Paint;
222
223    #[test]
224    fn test_buckets() {
225        let stats = Stats::new(&[-2.0, 14.0], None);
226        let options = HistogramOptions {
227            intervals: 8,
228            ..Default::default()
229        };
230        let mut hist = Histogram::new_with_stats(stats, &options);
231        hist.load(&[
232            -1.0, -1.1, 2.0, 2.0, 2.1, -0.9, 11.0, 11.2, 1.9, 1.99, 1.98, 1.97, 1.96,
233        ]);
234
235        assert_eq!(hist.top, 5);
236        let bucket = &hist.vec[0];
237        assert_eq!(bucket.range, -2.0..0.0);
238        assert_eq!(bucket.count, 3);
239        let bucket = &hist.vec[1];
240        assert_eq!(bucket.count, 5);
241        assert_eq!(bucket.range, 0.0..2.0);
242    }
243
244    #[test]
245    fn test_buckets_bad_stats() {
246        let options = HistogramOptions {
247            intervals: 6,
248            ..Default::default()
249        };
250        let mut hist = Histogram::new_with_stats(Stats::new(&[-2.0, 4.0], None), &options);
251        hist.load(&[-1.0, 2.0, -1.0, 2.0, 10.0, 10.0, 10.0, -10.0]);
252        assert_eq!(hist.top, 2);
253    }
254
255    #[test]
256    fn display_test() {
257        let stats = Stats::new(&[-2.0, 14.0], None);
258        let options = HistogramOptions {
259            intervals: 8,
260            precision: Some(3),
261            ..Default::default()
262        };
263        let mut hist = Histogram::new_with_stats(stats, &options);
264        hist.load(&[
265            -1.0, -1.1, 2.0, 2.0, 2.1, -0.9, 11.0, 11.2, 1.9, 1.99, 1.98, 1.97, 1.96,
266        ]);
267        Paint::disable();
268        let display = format!("{hist}");
269        assert!(display.contains("[-2.000 ..  0.000] [3] ∎∎∎\n"));
270        assert!(display.contains("[ 0.000 ..  2.000] [5] ∎∎∎∎∎\n"));
271        assert!(display.contains("[ 2.000 ..  4.000] [3] ∎∎∎\n"));
272        assert!(display.contains("[ 6.000 ..  8.000] [0] \n"));
273        assert!(display.contains("[10.000 .. 12.000] [2] ∎∎\n"));
274    }
275
276    #[test]
277    fn display_test_bad_width() {
278        let options = HistogramOptions {
279            intervals: 8,
280            precision: Some(3),
281            ..Default::default()
282        };
283        let mut hist = Histogram::new_with_stats(Stats::new(&[-2.0, 14.0], None), &options);
284        hist.load(&[
285            -1.0, -1.1, 2.0, 2.0, 2.1, -0.9, 11.0, 11.2, 1.9, 1.99, 1.98, 1.97, 1.96,
286        ]);
287        Paint::disable();
288        let display = format!("{hist:2}");
289        assert!(display.contains("[-2.000 ..  0.000] [3] ∎∎∎\n"));
290    }
291
292    #[test]
293    fn display_test_human_units() {
294        let vector = &[
295            -1.0,
296            -12000000.0,
297            -12000001.0,
298            -12000002.0,
299            -12000003.0,
300            -2000000.0,
301            500000.0,
302            500000.0,
303        ];
304        let hist = Histogram::new(
305            vector,
306            HistogramOptions {
307                intervals: 10,
308                ..Default::default()
309            },
310        );
311        Paint::disable();
312        let display = format!("{hist}");
313        assert!(display.contains("[-12.0 M .. -10.4 M] [4] ∎∎∎∎\n"));
314        assert!(display.contains("[ -2.6 M ..  -1.1 M] [1] ∎\n"));
315        assert!(display.contains("[ -1.1 M ..   0.5 M] [3] ∎∎∎\n"));
316        assert!(display.contains("Samples = 8; Min = -12.0 M; Max = 0.5 M"));
317        assert!(display.contains("Average = -6.1 M;"));
318    }
319
320    #[test]
321    fn display_test_log_scale() {
322        let hist = Histogram::new(
323            &[0.4, 0.4, 0.4, 0.4, 255.0, 0.2, 1.2, 128.0, 126.0, -7.0],
324            HistogramOptions {
325                intervals: 8,
326                log_scale: true,
327                ..Default::default()
328            },
329        );
330        Paint::disable();
331        let display = format!("{hist}");
332        assert!(display.contains("[  0.00 ..   1.00] [5] ∎∎∎∎∎\n"));
333        assert!(display.contains("[  1.00 ..   3.00] [1] ∎\n"));
334        assert!(display.contains("[  3.00 ..   7.00] [0]"));
335        assert!(display.contains("[  7.00 ..  15.00] [0]"));
336        assert!(display.contains("[ 15.00 ..  31.00] [0]"));
337        assert!(display.contains("[ 31.00 ..  63.00] [0]"));
338        assert!(display.contains("[ 63.00 .. 127.00] [1] ∎\n"));
339        assert!(display.contains("[127.00 .. 255.00] [2] ∎∎\n"));
340    }
341
342    #[test]
343    fn build_buckets_log_scale() {
344        let options = HistogramOptions {
345            intervals: 8,
346            log_scale: true,
347            ..Default::default()
348        };
349        let buckets = Histogram::build_buckets(0.0..2.0_f64.powi(8) - 1.0, &options);
350        assert!(buckets.len() == 8);
351        assert!(buckets[0].range == (0.0..1.0));
352        assert!(buckets[1].range == (1.0..3.0));
353        assert!(buckets[2].range == (3.0..7.0));
354        assert!(buckets[3].range == (7.0..15.0));
355        assert!(buckets[4].range == (15.0..31.0));
356        assert!(buckets[5].range == (31.0..63.0));
357        assert!(buckets[6].range == (63.0..127.0));
358        assert!(buckets[7].range == (127.0..255.0));
359    }
360
361    #[test]
362    fn build_buckets_log_scale_with_math() {
363        let options = HistogramOptions {
364            intervals: 10,
365            log_scale: true,
366            ..Default::default()
367        };
368        let buckets = Histogram::build_buckets(0.0..10000.0, &options);
369        assert!(buckets.len() == 10);
370        for i in 0..9 {
371            assert_float_eq!(
372                2.0 * (buckets[i].range.end - buckets[i].range.start),
373                buckets[i + 1].range.end - buckets[i + 1].range.start,
374                rmax <= 2.0 * f64::EPSILON
375            );
376        }
377        assert_float_eq!(
378            buckets[9].range.end - buckets[0].range.start,
379            10000.0,
380            rmax <= 2.0 * f64::EPSILON
381        );
382    }
383
384    #[test]
385    fn build_buckets_no_log_scale() {
386        let options = HistogramOptions {
387            intervals: 7,
388            ..Default::default()
389        };
390        let buckets = Histogram::build_buckets(0.0..700.0, &options);
391        assert!(buckets.len() == 7);
392        for i in 0..6 {
393            let min = (i * 100) as f64;
394            let max = ((i + 1) * 100) as f64;
395            assert!(buckets[i].range == (min..max));
396        }
397    }
398
399    #[test]
400    fn find_slot_linear() {
401        let options = HistogramOptions {
402            intervals: 8,
403            ..Default::default()
404        };
405        let hist = Histogram::new_with_stats(Stats::new(&[-12.0, 4.0], None), &options);
406        assert!(hist.find_slot(-13.0) == None);
407        assert!(hist.find_slot(13.0) == None);
408        assert!(hist.find_slot(-12.0) == Some(0));
409        assert!(hist.find_slot(-11.0) == Some(0));
410        assert!(hist.find_slot(-9.0) == Some(1));
411        assert!(hist.find_slot(4.0) == Some(7));
412        assert!(hist.find_slot(1.1) == Some(6));
413    }
414
415    #[test]
416    fn find_slot_logarithmic() {
417        let hist = Histogram::new(
418            // More than 8 values to avoid interval truncation
419            &[255.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, -2000.0],
420            HistogramOptions {
421                intervals: 8,
422                log_scale: true,
423                ..Default::default()
424            },
425        );
426        assert!(hist.find_slot(-1.0) == None);
427        assert!(hist.find_slot(0.0) == Some(0));
428        assert!(hist.find_slot(0.5) == Some(0));
429        assert!(hist.find_slot(1.5) == Some(1));
430        assert!(hist.find_slot(8.75) == Some(3));
431        assert!(hist.find_slot(33.1) == Some(5));
432        assert!(hist.find_slot(127.1) == Some(7));
433        assert!(hist.find_slot(247.1) == Some(7));
434        assert!(hist.find_slot(1000.0) == None);
435    }
436}