Skip to main content

plotkit_core/charts/
histogram.rs

1//! Histogram chart builder methods and binning utilities.
2//!
3//! This module extends [`HistArtist`] with a fluent API for configuring
4//! histogram properties, and provides the [`compute_bins`] utility function
5//! for computing equal-width bin edges and counts from raw data.
6//!
7//! Since [`Axes::hist`] returns `Result<&mut HistArtist>`, the builder
8//! methods can be chained directly on the return value:
9//!
10//! ```ignore
11//! ax.hist(&data, 20)?
12//!     .color(Color::TAB_BLUE)
13//!     .label("Distribution")
14//!     .alpha(0.7)
15//!     .density(true);
16//! ```
17//!
18//! The [`compute_bins`] function is typically called internally when
19//! constructing a [`HistArtist`], but it is public so that users can
20//! pre-compute bin edges and counts for custom workflows.
21//!
22//! [`Axes::hist`]: crate::axes::Axes::hist
23
24use crate::artist::HistArtist;
25use crate::primitives::Color;
26
27impl HistArtist {
28    /// Sets the bar fill color for every bin in the histogram.
29    ///
30    /// Accepts any [`Color`] value, which can be constructed from RGB
31    /// components, hex strings, or named color constants.
32    ///
33    /// # Arguments
34    ///
35    /// * `color` - The [`Color`] to fill each histogram bar with.
36    ///
37    /// # Examples
38    ///
39    /// ```ignore
40    /// artist.color(Color::TAB_BLUE);
41    /// ```
42    pub fn color(&mut self, color: Color) -> &mut Self {
43        self.color = color;
44        self
45    }
46
47    /// Sets the legend label for this histogram.
48    ///
49    /// When a label is set, the histogram will appear in the legend if one
50    /// is displayed on the axes. Pass an empty string or omit this call to
51    /// exclude the histogram from the legend. Calling this method again
52    /// overwrites any previously set label.
53    ///
54    /// # Arguments
55    ///
56    /// * `label` - A string slice that will be stored as the legend entry.
57    ///
58    /// # Examples
59    ///
60    /// ```ignore
61    /// artist.label("Scores");
62    /// ```
63    pub fn label(&mut self, label: &str) -> &mut Self {
64        self.label = Some(label.to_string());
65        self
66    }
67
68    /// Sets the opacity (0.0 = fully transparent, 1.0 = fully opaque).
69    ///
70    /// The value is clamped to the `[0.0, 1.0]` range. The default opacity
71    /// is determined by the active theme (typically `0.7` for histograms so
72    /// that overlapping distributions remain visible).
73    ///
74    /// # Arguments
75    ///
76    /// * `alpha` - The desired opacity level.
77    ///
78    /// # Examples
79    ///
80    /// ```ignore
81    /// artist.alpha(0.5); // 50% transparent
82    /// ```
83    pub fn alpha(&mut self, alpha: f64) -> &mut Self {
84        self.alpha = alpha.clamp(0.0, 1.0);
85        self
86    }
87
88    /// Controls whether the histogram displays probability density instead
89    /// of raw counts.
90    ///
91    /// When `density` is `true`, the `counts` vector is normalized so that
92    /// the total area under the histogram integrates to 1.0. Each bin's
93    /// value becomes `count / (total * bin_width)`. This is useful for
94    /// comparing distributions with different sample sizes or overlaying a
95    /// probability density function.
96    ///
97    /// When `density` is `false` (the default), the `counts` vector stores
98    /// raw frequency counts.
99    ///
100    /// # Arguments
101    ///
102    /// * `density` - If `true`, normalize the histogram to unit area.
103    ///
104    /// # Examples
105    ///
106    /// ```ignore
107    /// artist.density(true); // show probability density
108    /// ```
109    pub fn density(&mut self, density: bool) -> &mut Self {
110        self.density = density;
111        if density {
112            self.recompute_density();
113        }
114        self
115    }
116
117    /// Normalizes the `counts` vector so that the total area under the
118    /// histogram equals 1.0.
119    ///
120    /// Each bin value is divided by `total_count * bin_width`, where
121    /// `total_count` is the sum of all counts and `bin_width` is the width
122    /// of the corresponding bin. This method is called automatically by
123    /// [`density`](Self::density) when density mode is enabled.
124    fn recompute_density(&mut self) {
125        let total: f64 = self.counts.iter().sum();
126        if total > 0.0 && self.bin_edges.len() > 1 {
127            for (i, count) in self.counts.iter_mut().enumerate() {
128                let bin_width = self.bin_edges[i + 1] - self.bin_edges[i];
129                *count /= total * bin_width;
130            }
131        }
132    }
133}
134
135/// Computes equal-width bin edges and counts for a histogram.
136///
137/// Given a slice of data values and a desired number of bins, this function
138/// determines the bin edges and counts the number of data points that fall
139/// into each bin. Non-finite values (`NaN`, `+Inf`, `-Inf`) are silently
140/// ignored.
141///
142/// # Bin placement
143///
144/// Bins are equal-width and span the range `[min, max]` of the finite
145/// values, where `min` and `max` are the smallest and largest finite values
146/// in `data`. The i-th bin covers the half-open interval
147/// `[edge[i], edge[i+1])`, except for the last bin which is closed on both
148/// sides `[edge[n-1], edge[n]]` to include the maximum value.
149///
150/// # Single-value case
151///
152/// When all finite values are identical (i.e. `max == min`), the range is
153/// expanded to `[min - 0.5, max + 0.5]` so that the single value falls
154/// within the bin and the histogram has a visible width.
155///
156/// # Returns
157///
158/// A tuple `(edges, counts)` where:
159///
160/// * `edges` is a `Vec<f64>` of length `num_bins + 1` containing the sorted
161///   bin edges.
162/// * `counts` is a `Vec<f64>` of length `num_bins` containing the number of
163///   data points in each bin.
164///
165/// If `data` contains no finite values or `num_bins` is zero, both vectors
166/// are returned empty.
167///
168/// # Examples
169///
170/// ```
171/// use plotkit_core::charts::histogram::compute_bins;
172///
173/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
174/// let (edges, counts) = compute_bins(&data, 5);
175///
176/// assert_eq!(edges.len(), 6);    // 5 bins + 1
177/// assert_eq!(counts.len(), 5);   // one count per bin
178///
179/// // Every value lands in exactly one bin.
180/// let total: f64 = counts.iter().sum();
181/// assert_eq!(total, 5.0);
182/// ```
183pub fn compute_bins(data: &[f64], num_bins: usize) -> (Vec<f64>, Vec<f64>) {
184    let finite: Vec<f64> = data.iter().copied().filter(|v| v.is_finite()).collect();
185    if finite.is_empty() || num_bins == 0 {
186        return (vec![], vec![]);
187    }
188
189    let min = finite.iter().copied().fold(f64::INFINITY, f64::min);
190    let max = finite.iter().copied().fold(f64::NEG_INFINITY, f64::max);
191
192    // Handle single-value case: expand range so the histogram has visible width.
193    let (min, max) = if (max - min).abs() < f64::EPSILON {
194        (min - 0.5, max + 0.5)
195    } else {
196        (min, max)
197    };
198
199    let bin_width = (max - min) / num_bins as f64;
200    let edges: Vec<f64> = (0..=num_bins)
201        .map(|i| min + i as f64 * bin_width)
202        .collect();
203
204    // Count values in each bin.
205    let mut counts = vec![0.0f64; num_bins];
206    for &val in &finite {
207        let bin = ((val - min) / bin_width).floor() as usize;
208        // Clamp to the last bin so that the maximum value (which lands exactly
209        // on the right edge) is included in the final bin.
210        let bin = bin.min(num_bins - 1);
211        counts[bin] += 1.0;
212    }
213
214    (edges, counts)
215}
216
217// ---------------------------------------------------------------------------
218// Tests
219// ---------------------------------------------------------------------------
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224    use crate::series::Series;
225
226    /// Tolerance for floating-point comparisons.
227    const TOL: f64 = 1e-12;
228
229    /// Returns true if `a` and `b` are within `TOL` of each other.
230    fn approx_eq(a: f64, b: f64) -> bool {
231        (a - b).abs() < TOL
232    }
233
234    // -----------------------------------------------------------------------
235    // compute_bins — basic behavior
236    // -----------------------------------------------------------------------
237
238    #[test]
239    fn basic_five_values_five_bins() {
240        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
241        let (edges, counts) = compute_bins(&data, 5);
242
243        assert_eq!(edges.len(), 6);
244        assert_eq!(counts.len(), 5);
245
246        // Total count should equal the number of data points.
247        let total: f64 = counts.iter().sum();
248        assert!(approx_eq(total, 5.0));
249
250        // First edge should be the minimum value.
251        assert!(approx_eq(edges[0], 1.0));
252        // Last edge should be the maximum value.
253        assert!(approx_eq(edges[5], 5.0));
254    }
255
256    #[test]
257    fn all_values_in_one_bin() {
258        let data = vec![1.0, 1.5, 1.8, 1.9, 2.0];
259        let (edges, counts) = compute_bins(&data, 1);
260
261        assert_eq!(edges.len(), 2);
262        assert_eq!(counts.len(), 1);
263        assert!(approx_eq(counts[0], 5.0));
264        assert!(approx_eq(edges[0], 1.0));
265        assert!(approx_eq(edges[1], 2.0));
266    }
267
268    #[test]
269    fn even_distribution_across_bins() {
270        // 10 values evenly spaced from 0 to 9, placed into 5 bins.
271        let data: Vec<f64> = (0..10).map(|i| i as f64).collect();
272        let (edges, counts) = compute_bins(&data, 5);
273
274        assert_eq!(edges.len(), 6);
275        assert_eq!(counts.len(), 5);
276
277        // With values 0..9 and 5 equal-width bins of width 1.8:
278        // every value should land in exactly one bin.
279        let total: f64 = counts.iter().sum();
280        assert!(approx_eq(total, 10.0));
281    }
282
283    // -----------------------------------------------------------------------
284    // compute_bins — edge cases
285    // -----------------------------------------------------------------------
286
287    #[test]
288    fn empty_data_returns_empty() {
289        let (edges, counts) = compute_bins(&[], 10);
290        assert!(edges.is_empty());
291        assert!(counts.is_empty());
292    }
293
294    #[test]
295    fn zero_bins_returns_empty() {
296        let data = vec![1.0, 2.0, 3.0];
297        let (edges, counts) = compute_bins(&data, 0);
298        assert!(edges.is_empty());
299        assert!(counts.is_empty());
300    }
301
302    #[test]
303    fn all_nan_returns_empty() {
304        let data = vec![f64::NAN, f64::NAN, f64::NAN];
305        let (edges, counts) = compute_bins(&data, 5);
306        assert!(edges.is_empty());
307        assert!(counts.is_empty());
308    }
309
310    #[test]
311    fn non_finite_values_are_ignored() {
312        let data = vec![f64::NAN, 1.0, f64::INFINITY, 2.0, f64::NEG_INFINITY, 3.0];
313        let (edges, counts) = compute_bins(&data, 3);
314
315        assert_eq!(edges.len(), 4);
316        assert_eq!(counts.len(), 3);
317
318        // Only the three finite values (1.0, 2.0, 3.0) should be counted.
319        let total: f64 = counts.iter().sum();
320        assert!(approx_eq(total, 3.0));
321    }
322
323    #[test]
324    fn single_value_expands_range() {
325        let data = vec![5.0, 5.0, 5.0];
326        let (edges, counts) = compute_bins(&data, 2);
327
328        assert_eq!(edges.len(), 3);
329        assert_eq!(counts.len(), 2);
330
331        // Range should be expanded to [4.5, 5.5].
332        assert!(approx_eq(edges[0], 4.5));
333        assert!(approx_eq(edges[2], 5.5));
334
335        // All values should be counted.
336        let total: f64 = counts.iter().sum();
337        assert!(approx_eq(total, 3.0));
338    }
339
340    #[test]
341    fn single_data_point_single_bin() {
342        let data = vec![42.0];
343        let (edges, counts) = compute_bins(&data, 1);
344
345        assert_eq!(edges.len(), 2);
346        assert_eq!(counts.len(), 1);
347        assert!(approx_eq(edges[0], 41.5));
348        assert!(approx_eq(edges[1], 42.5));
349        assert!(approx_eq(counts[0], 1.0));
350    }
351
352    #[test]
353    fn maximum_value_lands_in_last_bin() {
354        // The maximum value sits exactly on the right edge of the last bin.
355        // It must be included in the last bin, not lost.
356        let data = vec![0.0, 1.0, 2.0, 3.0, 4.0];
357        let (_, counts) = compute_bins(&data, 4);
358
359        let total: f64 = counts.iter().sum();
360        assert!(approx_eq(total, 5.0));
361
362        // Specifically, 4.0 (the max) should be in the last bin.
363        assert!(counts[3] >= 1.0);
364    }
365
366    // -----------------------------------------------------------------------
367    // compute_bins — structural invariants
368    // -----------------------------------------------------------------------
369
370    #[test]
371    fn edges_are_monotonically_increasing() {
372        let data: Vec<f64> = (0..100).map(|i| (i as f64) * 0.37 - 10.0).collect();
373        let (edges, _) = compute_bins(&data, 15);
374
375        for window in edges.windows(2) {
376            assert!(
377                window[1] > window[0],
378                "edges not monotonically increasing: {} >= {}",
379                window[0],
380                window[1]
381            );
382        }
383    }
384
385    #[test]
386    fn bins_are_equal_width() {
387        let data = vec![0.0, 10.0, 20.0, 30.0, 40.0, 50.0];
388        let (edges, _) = compute_bins(&data, 5);
389
390        let expected_width = (50.0 - 0.0) / 5.0;
391        for window in edges.windows(2) {
392            let width = window[1] - window[0];
393            assert!(
394                approx_eq(width, expected_width),
395                "bin width {} differs from expected {}",
396                width,
397                expected_width
398            );
399        }
400    }
401
402    #[test]
403    fn total_count_equals_finite_data_length() {
404        let data = vec![
405            1.0, 2.0, 3.0, 4.0, 5.0,
406            f64::NAN, f64::INFINITY, f64::NEG_INFINITY,
407        ];
408        let (_, counts) = compute_bins(&data, 3);
409
410        let total: f64 = counts.iter().sum();
411        assert!(approx_eq(total, 5.0));
412    }
413
414    #[test]
415    fn large_number_of_bins() {
416        let data: Vec<f64> = (0..1000).map(|i| i as f64).collect();
417        let (edges, counts) = compute_bins(&data, 500);
418
419        assert_eq!(edges.len(), 501);
420        assert_eq!(counts.len(), 500);
421
422        let total: f64 = counts.iter().sum();
423        assert!(approx_eq(total, 1000.0));
424    }
425
426    #[test]
427    fn negative_values() {
428        let data = vec![-10.0, -5.0, -3.0, -1.0, 0.0];
429        let (edges, counts) = compute_bins(&data, 2);
430
431        assert_eq!(edges.len(), 3);
432        assert_eq!(counts.len(), 2);
433
434        assert!(approx_eq(edges[0], -10.0));
435        assert!(approx_eq(edges[2], 0.0));
436
437        let total: f64 = counts.iter().sum();
438        assert!(approx_eq(total, 5.0));
439    }
440
441    #[test]
442    fn mixed_positive_and_negative() {
443        let data = vec![-2.0, -1.0, 0.0, 1.0, 2.0];
444        let (edges, counts) = compute_bins(&data, 4);
445
446        assert_eq!(edges.len(), 5);
447        assert_eq!(counts.len(), 4);
448
449        assert!(approx_eq(edges[0], -2.0));
450        assert!(approx_eq(edges[4], 2.0));
451
452        let total: f64 = counts.iter().sum();
453        assert!(approx_eq(total, 5.0));
454    }
455
456    // -----------------------------------------------------------------------
457    // HistArtist builder methods
458    // -----------------------------------------------------------------------
459
460    /// Helper: build a minimal `HistArtist` for builder method tests.
461    fn sample_hist() -> HistArtist {
462        HistArtist {
463            data: Series::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
464            bins: 3,
465            bin_edges: vec![1.0, 3.0, 5.0, 7.0],
466            counts: vec![2.0, 2.0, 2.0],
467            color: Color::TAB_BLUE,
468            label: None,
469            alpha: 1.0,
470            density: false,
471        }
472    }
473
474    #[test]
475    fn builder_color() {
476        let mut h = sample_hist();
477        h.color(Color::TAB_RED);
478        assert_eq!(h.color, Color::TAB_RED);
479    }
480
481    #[test]
482    fn builder_label() {
483        let mut h = sample_hist();
484        assert!(h.label.is_none());
485        h.label("Distribution");
486        assert_eq!(h.label.as_deref(), Some("Distribution"));
487    }
488
489    #[test]
490    fn builder_label_overwrite() {
491        let mut h = sample_hist();
492        h.label("first");
493        h.label("second");
494        assert_eq!(h.label.as_deref(), Some("second"));
495    }
496
497    #[test]
498    fn builder_alpha_clamps_to_range() {
499        let mut h = sample_hist();
500
501        h.alpha(0.5);
502        assert!(approx_eq(h.alpha, 0.5));
503
504        h.alpha(-1.0);
505        assert!(approx_eq(h.alpha, 0.0));
506
507        h.alpha(2.0);
508        assert!(approx_eq(h.alpha, 1.0));
509    }
510
511    #[test]
512    fn builder_alpha_boundaries() {
513        let mut h = sample_hist();
514
515        h.alpha(0.0);
516        assert!(approx_eq(h.alpha, 0.0));
517
518        h.alpha(1.0);
519        assert!(approx_eq(h.alpha, 1.0));
520    }
521
522    #[test]
523    fn builder_density_normalizes_counts() {
524        let mut h = sample_hist();
525        // counts = [2.0, 2.0, 2.0], bin_edges = [1.0, 3.0, 5.0, 7.0]
526        // total = 6.0, each bin_width = 2.0
527        // density[i] = count[i] / (total * bin_width) = 2.0 / (6.0 * 2.0) = 1/6
528        h.density(true);
529
530        assert!(h.density);
531        let expected = 2.0 / (6.0 * 2.0);
532        for &c in &h.counts {
533            assert!(
534                approx_eq(c, expected),
535                "expected density {expected}, got {c}"
536            );
537        }
538    }
539
540    #[test]
541    fn builder_density_false_does_not_modify_counts() {
542        let mut h = sample_hist();
543        let original_counts = h.counts.clone();
544        h.density(false);
545        assert!(!h.density);
546        assert_eq!(h.counts, original_counts);
547    }
548
549    #[test]
550    fn builder_density_with_zero_total() {
551        let mut h = HistArtist {
552            data: Series::new(vec![]),
553            bins: 2,
554            bin_edges: vec![0.0, 1.0, 2.0],
555            counts: vec![0.0, 0.0],
556            color: Color::BLACK,
557            label: None,
558            alpha: 1.0,
559            density: false,
560        };
561        // Should not panic or produce NaN when total is zero.
562        h.density(true);
563        assert!(h.counts.iter().all(|c| c.is_finite()));
564    }
565
566    #[test]
567    fn builder_density_area_integrates_to_one() {
568        // Use compute_bins to get realistic counts, then enable density.
569        let data: Vec<f64> = (0..100).map(|i| i as f64 * 0.1).collect();
570        let (edges, counts) = compute_bins(&data, 10);
571        let mut h = HistArtist {
572            data: Series::new(data),
573            bins: 10,
574            bin_edges: edges,
575            counts,
576            color: Color::TAB_BLUE,
577            label: None,
578            alpha: 1.0,
579            density: false,
580        };
581
582        h.density(true);
583
584        // The total area (sum of density * bin_width) should be 1.0.
585        let area: f64 = h
586            .counts
587            .iter()
588            .enumerate()
589            .map(|(i, &d)| d * (h.bin_edges[i + 1] - h.bin_edges[i]))
590            .sum();
591        assert!(
592            (area - 1.0).abs() < 1e-10,
593            "density area should be 1.0, got {area}"
594        );
595    }
596
597    #[test]
598    fn builder_chaining() {
599        let mut h = sample_hist();
600        h.color(Color::TAB_GREEN)
601            .label("Test")
602            .alpha(0.8);
603
604        assert_eq!(h.color, Color::TAB_GREEN);
605        assert_eq!(h.label.as_deref(), Some("Test"));
606        assert!(approx_eq(h.alpha, 0.8));
607    }
608}