Skip to main content

plotkit_core/charts/
histogram.rs

1//! Histogram chart builder methods and binning utilities.
2//!
3//! This module extends [`HistArtist`] with a fluent API for configuring
4//! histogram properties, and provides the [`compute_bins`] utility function
5//! for computing equal-width bin edges and counts from raw data.
6//!
7//! Since [`Axes::hist`] returns `Result<&mut HistArtist>`, the builder
8//! methods can be chained directly on the return value:
9//!
10//! ```ignore
11//! ax.hist(&data, 20)?
12//!     .color(Color::TAB_BLUE)
13//!     .label("Distribution")
14//!     .alpha(0.7)
15//!     .density(true);
16//! ```
17//!
18//! The [`compute_bins`] function is typically called internally when
19//! constructing a [`HistArtist`], but it is public so that users can
20//! pre-compute bin edges and counts for custom workflows.
21//!
22//! [`Axes::hist`]: crate::axes::Axes::hist
23
24use crate::artist::HistArtist;
25use crate::primitives::Color;
26
27impl HistArtist {
28    /// Sets the bar fill color for every bin in the histogram.
29    ///
30    /// Accepts any [`Color`] value, which can be constructed from RGB
31    /// components, hex strings, or named color constants.
32    ///
33    /// # Arguments
34    ///
35    /// * `color` - The [`Color`] to fill each histogram bar with.
36    ///
37    /// # Examples
38    ///
39    /// ```ignore
40    /// artist.color(Color::TAB_BLUE);
41    /// ```
42    pub fn color(&mut self, color: Color) -> &mut Self {
43        self.color = color;
44        self
45    }
46
47    /// Sets the legend label for this histogram.
48    ///
49    /// When a label is set, the histogram will appear in the legend if one
50    /// is displayed on the axes. Pass an empty string or omit this call to
51    /// exclude the histogram from the legend. Calling this method again
52    /// overwrites any previously set label.
53    ///
54    /// # Arguments
55    ///
56    /// * `label` - A string slice that will be stored as the legend entry.
57    ///
58    /// # Examples
59    ///
60    /// ```ignore
61    /// artist.label("Scores");
62    /// ```
63    pub fn label(&mut self, label: &str) -> &mut Self {
64        self.label = Some(label.to_string());
65        self
66    }
67
68    /// Sets the opacity (0.0 = fully transparent, 1.0 = fully opaque).
69    ///
70    /// The value is clamped to the `[0.0, 1.0]` range. The default opacity
71    /// is determined by the active theme (typically `0.7` for histograms so
72    /// that overlapping distributions remain visible).
73    ///
74    /// # Arguments
75    ///
76    /// * `alpha` - The desired opacity level.
77    ///
78    /// # Examples
79    ///
80    /// ```ignore
81    /// artist.alpha(0.5); // 50% transparent
82    /// ```
83    pub fn alpha(&mut self, alpha: f64) -> &mut Self {
84        self.alpha = alpha.clamp(0.0, 1.0);
85        self
86    }
87
88    /// Controls whether the histogram displays probability density instead
89    /// of raw counts.
90    ///
91    /// When `density` is `true`, the `counts` vector is normalized so that
92    /// the total area under the histogram integrates to 1.0. Each bin's
93    /// value becomes `count / (total * bin_width)`. This is useful for
94    /// comparing distributions with different sample sizes or overlaying a
95    /// probability density function.
96    ///
97    /// When `density` is `false` (the default), the `counts` vector stores
98    /// raw frequency counts.
99    ///
100    /// # Arguments
101    ///
102    /// * `density` - If `true`, normalize the histogram to unit area.
103    ///
104    /// # Examples
105    ///
106    /// ```ignore
107    /// artist.density(true); // show probability density
108    /// ```
109    pub fn density(&mut self, density: bool) -> &mut Self {
110        self.density = density;
111        if density {
112            self.recompute_density();
113        }
114        self
115    }
116
117    /// Normalizes the `counts` vector so that the total area under the
118    /// histogram equals 1.0.
119    ///
120    /// Each bin value is divided by `total_count * bin_width`, where
121    /// `total_count` is the sum of all counts and `bin_width` is the width
122    /// of the corresponding bin. This method is called automatically by
123    /// [`density`](Self::density) when density mode is enabled.
124    fn recompute_density(&mut self) {
125        let total: f64 = self.counts.iter().sum();
126        if total > 0.0 && self.bin_edges.len() > 1 {
127            for (i, count) in self.counts.iter_mut().enumerate() {
128                let bin_width = self.bin_edges[i + 1] - self.bin_edges[i];
129                *count /= total * bin_width;
130            }
131        }
132    }
133}
134
135/// Computes equal-width bin edges and counts for a histogram.
136///
137/// Given a slice of data values and a desired number of bins, this function
138/// determines the bin edges and counts the number of data points that fall
139/// into each bin. Non-finite values (`NaN`, `+Inf`, `-Inf`) are silently
140/// ignored.
141///
142/// # Bin placement
143///
144/// Bins are equal-width and span the range `[min, max]` of the finite
145/// values, where `min` and `max` are the smallest and largest finite values
146/// in `data`. The i-th bin covers the half-open interval
147/// `[edge[i], edge[i+1])`, except for the last bin which is closed on both
148/// sides `[edge[n-1], edge[n]]` to include the maximum value.
149///
150/// # Single-value case
151///
152/// When all finite values are identical (i.e. `max == min`), the range is
153/// expanded to `[min - 0.5, max + 0.5]` so that the single value falls
154/// within the bin and the histogram has a visible width.
155///
156/// # Returns
157///
158/// A tuple `(edges, counts)` where:
159///
160/// * `edges` is a `Vec<f64>` of length `num_bins + 1` containing the sorted
161///   bin edges.
162/// * `counts` is a `Vec<f64>` of length `num_bins` containing the number of
163///   data points in each bin.
164///
165/// If `data` contains no finite values or `num_bins` is zero, both vectors
166/// are returned empty.
167///
168/// # Examples
169///
170/// ```
171/// use plotkit_core::charts::histogram::compute_bins;
172///
173/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
174/// let (edges, counts) = compute_bins(&data, 5);
175///
176/// assert_eq!(edges.len(), 6);    // 5 bins + 1
177/// assert_eq!(counts.len(), 5);   // one count per bin
178///
179/// // Every value lands in exactly one bin.
180/// let total: f64 = counts.iter().sum();
181/// assert_eq!(total, 5.0);
182/// ```
183pub fn compute_bins(data: &[f64], num_bins: usize) -> (Vec<f64>, Vec<f64>) {
184    let finite: Vec<f64> = data.iter().copied().filter(|v| v.is_finite()).collect();
185    if finite.is_empty() || num_bins == 0 {
186        return (vec![], vec![]);
187    }
188
189    let min = finite.iter().copied().fold(f64::INFINITY, f64::min);
190    let max = finite.iter().copied().fold(f64::NEG_INFINITY, f64::max);
191
192    // Handle single-value case: expand range so the histogram has visible width.
193    let (min, max) = if (max - min).abs() < f64::EPSILON {
194        (min - 0.5, max + 0.5)
195    } else {
196        (min, max)
197    };
198
199    let bin_width = (max - min) / num_bins as f64;
200    let edges: Vec<f64> = (0..=num_bins).map(|i| min + i as f64 * bin_width).collect();
201
202    // Count values in each bin.
203    let mut counts = vec![0.0f64; num_bins];
204    for &val in &finite {
205        let bin = ((val - min) / bin_width).floor() as usize;
206        // Clamp to the last bin so that the maximum value (which lands exactly
207        // on the right edge) is included in the final bin.
208        let bin = bin.min(num_bins - 1);
209        counts[bin] += 1.0;
210    }
211
212    (edges, counts)
213}
214
215// ---------------------------------------------------------------------------
216// Tests
217// ---------------------------------------------------------------------------
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222    use crate::series::Series;
223
224    /// Tolerance for floating-point comparisons.
225    const TOL: f64 = 1e-12;
226
227    /// Returns true if `a` and `b` are within `TOL` of each other.
228    fn approx_eq(a: f64, b: f64) -> bool {
229        (a - b).abs() < TOL
230    }
231
232    // -----------------------------------------------------------------------
233    // compute_bins — basic behavior
234    // -----------------------------------------------------------------------
235
236    #[test]
237    fn basic_five_values_five_bins() {
238        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
239        let (edges, counts) = compute_bins(&data, 5);
240
241        assert_eq!(edges.len(), 6);
242        assert_eq!(counts.len(), 5);
243
244        // Total count should equal the number of data points.
245        let total: f64 = counts.iter().sum();
246        assert!(approx_eq(total, 5.0));
247
248        // First edge should be the minimum value.
249        assert!(approx_eq(edges[0], 1.0));
250        // Last edge should be the maximum value.
251        assert!(approx_eq(edges[5], 5.0));
252    }
253
254    #[test]
255    fn all_values_in_one_bin() {
256        let data = vec![1.0, 1.5, 1.8, 1.9, 2.0];
257        let (edges, counts) = compute_bins(&data, 1);
258
259        assert_eq!(edges.len(), 2);
260        assert_eq!(counts.len(), 1);
261        assert!(approx_eq(counts[0], 5.0));
262        assert!(approx_eq(edges[0], 1.0));
263        assert!(approx_eq(edges[1], 2.0));
264    }
265
266    #[test]
267    fn even_distribution_across_bins() {
268        // 10 values evenly spaced from 0 to 9, placed into 5 bins.
269        let data: Vec<f64> = (0..10).map(|i| i as f64).collect();
270        let (edges, counts) = compute_bins(&data, 5);
271
272        assert_eq!(edges.len(), 6);
273        assert_eq!(counts.len(), 5);
274
275        // With values 0..9 and 5 equal-width bins of width 1.8:
276        // every value should land in exactly one bin.
277        let total: f64 = counts.iter().sum();
278        assert!(approx_eq(total, 10.0));
279    }
280
281    // -----------------------------------------------------------------------
282    // compute_bins — edge cases
283    // -----------------------------------------------------------------------
284
285    #[test]
286    fn empty_data_returns_empty() {
287        let (edges, counts) = compute_bins(&[], 10);
288        assert!(edges.is_empty());
289        assert!(counts.is_empty());
290    }
291
292    #[test]
293    fn zero_bins_returns_empty() {
294        let data = vec![1.0, 2.0, 3.0];
295        let (edges, counts) = compute_bins(&data, 0);
296        assert!(edges.is_empty());
297        assert!(counts.is_empty());
298    }
299
300    #[test]
301    fn all_nan_returns_empty() {
302        let data = vec![f64::NAN, f64::NAN, f64::NAN];
303        let (edges, counts) = compute_bins(&data, 5);
304        assert!(edges.is_empty());
305        assert!(counts.is_empty());
306    }
307
308    #[test]
309    fn non_finite_values_are_ignored() {
310        let data = vec![f64::NAN, 1.0, f64::INFINITY, 2.0, f64::NEG_INFINITY, 3.0];
311        let (edges, counts) = compute_bins(&data, 3);
312
313        assert_eq!(edges.len(), 4);
314        assert_eq!(counts.len(), 3);
315
316        // Only the three finite values (1.0, 2.0, 3.0) should be counted.
317        let total: f64 = counts.iter().sum();
318        assert!(approx_eq(total, 3.0));
319    }
320
321    #[test]
322    fn single_value_expands_range() {
323        let data = vec![5.0, 5.0, 5.0];
324        let (edges, counts) = compute_bins(&data, 2);
325
326        assert_eq!(edges.len(), 3);
327        assert_eq!(counts.len(), 2);
328
329        // Range should be expanded to [4.5, 5.5].
330        assert!(approx_eq(edges[0], 4.5));
331        assert!(approx_eq(edges[2], 5.5));
332
333        // All values should be counted.
334        let total: f64 = counts.iter().sum();
335        assert!(approx_eq(total, 3.0));
336    }
337
338    #[test]
339    fn single_data_point_single_bin() {
340        let data = vec![42.0];
341        let (edges, counts) = compute_bins(&data, 1);
342
343        assert_eq!(edges.len(), 2);
344        assert_eq!(counts.len(), 1);
345        assert!(approx_eq(edges[0], 41.5));
346        assert!(approx_eq(edges[1], 42.5));
347        assert!(approx_eq(counts[0], 1.0));
348    }
349
350    #[test]
351    fn maximum_value_lands_in_last_bin() {
352        // The maximum value sits exactly on the right edge of the last bin.
353        // It must be included in the last bin, not lost.
354        let data = vec![0.0, 1.0, 2.0, 3.0, 4.0];
355        let (_, counts) = compute_bins(&data, 4);
356
357        let total: f64 = counts.iter().sum();
358        assert!(approx_eq(total, 5.0));
359
360        // Specifically, 4.0 (the max) should be in the last bin.
361        assert!(counts[3] >= 1.0);
362    }
363
364    // -----------------------------------------------------------------------
365    // compute_bins — structural invariants
366    // -----------------------------------------------------------------------
367
368    #[test]
369    fn edges_are_monotonically_increasing() {
370        let data: Vec<f64> = (0..100).map(|i| (i as f64) * 0.37 - 10.0).collect();
371        let (edges, _) = compute_bins(&data, 15);
372
373        for window in edges.windows(2) {
374            assert!(
375                window[1] > window[0],
376                "edges not monotonically increasing: {} >= {}",
377                window[0],
378                window[1]
379            );
380        }
381    }
382
383    #[test]
384    fn bins_are_equal_width() {
385        let data = vec![0.0, 10.0, 20.0, 30.0, 40.0, 50.0];
386        let (edges, _) = compute_bins(&data, 5);
387
388        let expected_width = (50.0 - 0.0) / 5.0;
389        for window in edges.windows(2) {
390            let width = window[1] - window[0];
391            assert!(
392                approx_eq(width, expected_width),
393                "bin width {} differs from expected {}",
394                width,
395                expected_width
396            );
397        }
398    }
399
400    #[test]
401    fn total_count_equals_finite_data_length() {
402        let data = vec![
403            1.0,
404            2.0,
405            3.0,
406            4.0,
407            5.0,
408            f64::NAN,
409            f64::INFINITY,
410            f64::NEG_INFINITY,
411        ];
412        let (_, counts) = compute_bins(&data, 3);
413
414        let total: f64 = counts.iter().sum();
415        assert!(approx_eq(total, 5.0));
416    }
417
418    #[test]
419    fn large_number_of_bins() {
420        let data: Vec<f64> = (0..1000).map(|i| i as f64).collect();
421        let (edges, counts) = compute_bins(&data, 500);
422
423        assert_eq!(edges.len(), 501);
424        assert_eq!(counts.len(), 500);
425
426        let total: f64 = counts.iter().sum();
427        assert!(approx_eq(total, 1000.0));
428    }
429
430    #[test]
431    fn negative_values() {
432        let data = vec![-10.0, -5.0, -3.0, -1.0, 0.0];
433        let (edges, counts) = compute_bins(&data, 2);
434
435        assert_eq!(edges.len(), 3);
436        assert_eq!(counts.len(), 2);
437
438        assert!(approx_eq(edges[0], -10.0));
439        assert!(approx_eq(edges[2], 0.0));
440
441        let total: f64 = counts.iter().sum();
442        assert!(approx_eq(total, 5.0));
443    }
444
445    #[test]
446    fn mixed_positive_and_negative() {
447        let data = vec![-2.0, -1.0, 0.0, 1.0, 2.0];
448        let (edges, counts) = compute_bins(&data, 4);
449
450        assert_eq!(edges.len(), 5);
451        assert_eq!(counts.len(), 4);
452
453        assert!(approx_eq(edges[0], -2.0));
454        assert!(approx_eq(edges[4], 2.0));
455
456        let total: f64 = counts.iter().sum();
457        assert!(approx_eq(total, 5.0));
458    }
459
460    // -----------------------------------------------------------------------
461    // HistArtist builder methods
462    // -----------------------------------------------------------------------
463
464    /// Helper: build a minimal `HistArtist` for builder method tests.
465    fn sample_hist() -> HistArtist {
466        HistArtist {
467            data: Series::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
468            bins: 3,
469            bin_edges: vec![1.0, 3.0, 5.0, 7.0],
470            counts: vec![2.0, 2.0, 2.0],
471            color: Color::TAB_BLUE,
472            label: None,
473            alpha: 1.0,
474            density: false,
475        }
476    }
477
478    #[test]
479    fn builder_color() {
480        let mut h = sample_hist();
481        h.color(Color::TAB_RED);
482        assert_eq!(h.color, Color::TAB_RED);
483    }
484
485    #[test]
486    fn builder_label() {
487        let mut h = sample_hist();
488        assert!(h.label.is_none());
489        h.label("Distribution");
490        assert_eq!(h.label.as_deref(), Some("Distribution"));
491    }
492
493    #[test]
494    fn builder_label_overwrite() {
495        let mut h = sample_hist();
496        h.label("first");
497        h.label("second");
498        assert_eq!(h.label.as_deref(), Some("second"));
499    }
500
501    #[test]
502    fn builder_alpha_clamps_to_range() {
503        let mut h = sample_hist();
504
505        h.alpha(0.5);
506        assert!(approx_eq(h.alpha, 0.5));
507
508        h.alpha(-1.0);
509        assert!(approx_eq(h.alpha, 0.0));
510
511        h.alpha(2.0);
512        assert!(approx_eq(h.alpha, 1.0));
513    }
514
515    #[test]
516    fn builder_alpha_boundaries() {
517        let mut h = sample_hist();
518
519        h.alpha(0.0);
520        assert!(approx_eq(h.alpha, 0.0));
521
522        h.alpha(1.0);
523        assert!(approx_eq(h.alpha, 1.0));
524    }
525
526    #[test]
527    fn builder_density_normalizes_counts() {
528        let mut h = sample_hist();
529        // counts = [2.0, 2.0, 2.0], bin_edges = [1.0, 3.0, 5.0, 7.0]
530        // total = 6.0, each bin_width = 2.0
531        // density[i] = count[i] / (total * bin_width) = 2.0 / (6.0 * 2.0) = 1/6
532        h.density(true);
533
534        assert!(h.density);
535        let expected = 2.0 / (6.0 * 2.0);
536        for &c in &h.counts {
537            assert!(
538                approx_eq(c, expected),
539                "expected density {expected}, got {c}"
540            );
541        }
542    }
543
544    #[test]
545    fn builder_density_false_does_not_modify_counts() {
546        let mut h = sample_hist();
547        let original_counts = h.counts.clone();
548        h.density(false);
549        assert!(!h.density);
550        assert_eq!(h.counts, original_counts);
551    }
552
553    #[test]
554    fn builder_density_with_zero_total() {
555        let mut h = HistArtist {
556            data: Series::new(vec![]),
557            bins: 2,
558            bin_edges: vec![0.0, 1.0, 2.0],
559            counts: vec![0.0, 0.0],
560            color: Color::BLACK,
561            label: None,
562            alpha: 1.0,
563            density: false,
564        };
565        // Should not panic or produce NaN when total is zero.
566        h.density(true);
567        assert!(h.counts.iter().all(|c| c.is_finite()));
568    }
569
570    #[test]
571    fn builder_density_area_integrates_to_one() {
572        // Use compute_bins to get realistic counts, then enable density.
573        let data: Vec<f64> = (0..100).map(|i| i as f64 * 0.1).collect();
574        let (edges, counts) = compute_bins(&data, 10);
575        let mut h = HistArtist {
576            data: Series::new(data),
577            bins: 10,
578            bin_edges: edges,
579            counts,
580            color: Color::TAB_BLUE,
581            label: None,
582            alpha: 1.0,
583            density: false,
584        };
585
586        h.density(true);
587
588        // The total area (sum of density * bin_width) should be 1.0.
589        let area: f64 = h
590            .counts
591            .iter()
592            .enumerate()
593            .map(|(i, &d)| d * (h.bin_edges[i + 1] - h.bin_edges[i]))
594            .sum();
595        assert!(
596            (area - 1.0).abs() < 1e-10,
597            "density area should be 1.0, got {area}"
598        );
599    }
600
601    #[test]
602    fn builder_chaining() {
603        let mut h = sample_hist();
604        h.color(Color::TAB_GREEN).label("Test").alpha(0.8);
605
606        assert_eq!(h.color, Color::TAB_GREEN);
607        assert_eq!(h.label.as_deref(), Some("Test"));
608        assert!(approx_eq(h.alpha, 0.8));
609    }
610}