plotkit_core/charts/histogram.rs
1//! Histogram chart builder methods and binning utilities.
2//!
3//! This module extends [`HistArtist`] with a fluent API for configuring
4//! histogram properties, and provides the [`compute_bins`] utility function
5//! for computing equal-width bin edges and counts from raw data.
6//!
7//! Since [`Axes::hist`] returns `Result<&mut HistArtist>`, the builder
8//! methods can be chained directly on the return value:
9//!
10//! ```ignore
11//! ax.hist(&data, 20)?
12//! .color(Color::TAB_BLUE)
13//! .label("Distribution")
14//! .alpha(0.7)
15//! .density(true);
16//! ```
17//!
18//! The [`compute_bins`] function is typically called internally when
19//! constructing a [`HistArtist`], but it is public so that users can
20//! pre-compute bin edges and counts for custom workflows.
21//!
22//! [`Axes::hist`]: crate::axes::Axes::hist
23
24use crate::artist::HistArtist;
25use crate::primitives::Color;
26
27impl HistArtist {
28 /// Sets the bar fill color for every bin in the histogram.
29 ///
30 /// Accepts any [`Color`] value, which can be constructed from RGB
31 /// components, hex strings, or named color constants.
32 ///
33 /// # Arguments
34 ///
35 /// * `color` - The [`Color`] to fill each histogram bar with.
36 ///
37 /// # Examples
38 ///
39 /// ```ignore
40 /// artist.color(Color::TAB_BLUE);
41 /// ```
42 pub fn color(&mut self, color: Color) -> &mut Self {
43 self.color = color;
44 self
45 }
46
47 /// Sets the legend label for this histogram.
48 ///
49 /// When a label is set, the histogram will appear in the legend if one
50 /// is displayed on the axes. Pass an empty string or omit this call to
51 /// exclude the histogram from the legend. Calling this method again
52 /// overwrites any previously set label.
53 ///
54 /// # Arguments
55 ///
56 /// * `label` - A string slice that will be stored as the legend entry.
57 ///
58 /// # Examples
59 ///
60 /// ```ignore
61 /// artist.label("Scores");
62 /// ```
63 pub fn label(&mut self, label: &str) -> &mut Self {
64 self.label = Some(label.to_string());
65 self
66 }
67
68 /// Sets the opacity (0.0 = fully transparent, 1.0 = fully opaque).
69 ///
70 /// The value is clamped to the `[0.0, 1.0]` range. The default opacity
71 /// is determined by the active theme (typically `0.7` for histograms so
72 /// that overlapping distributions remain visible).
73 ///
74 /// # Arguments
75 ///
76 /// * `alpha` - The desired opacity level.
77 ///
78 /// # Examples
79 ///
80 /// ```ignore
81 /// artist.alpha(0.5); // 50% transparent
82 /// ```
83 pub fn alpha(&mut self, alpha: f64) -> &mut Self {
84 self.alpha = alpha.clamp(0.0, 1.0);
85 self
86 }
87
88 /// Controls whether the histogram displays probability density instead
89 /// of raw counts.
90 ///
91 /// When `density` is `true`, the `counts` vector is normalized so that
92 /// the total area under the histogram integrates to 1.0. Each bin's
93 /// value becomes `count / (total * bin_width)`. This is useful for
94 /// comparing distributions with different sample sizes or overlaying a
95 /// probability density function.
96 ///
97 /// When `density` is `false` (the default), the `counts` vector stores
98 /// raw frequency counts.
99 ///
100 /// # Arguments
101 ///
102 /// * `density` - If `true`, normalize the histogram to unit area.
103 ///
104 /// # Examples
105 ///
106 /// ```ignore
107 /// artist.density(true); // show probability density
108 /// ```
109 pub fn density(&mut self, density: bool) -> &mut Self {
110 self.density = density;
111 if density {
112 self.recompute_density();
113 }
114 self
115 }
116
117 /// Normalizes the `counts` vector so that the total area under the
118 /// histogram equals 1.0.
119 ///
120 /// Each bin value is divided by `total_count * bin_width`, where
121 /// `total_count` is the sum of all counts and `bin_width` is the width
122 /// of the corresponding bin. This method is called automatically by
123 /// [`density`](Self::density) when density mode is enabled.
124 fn recompute_density(&mut self) {
125 let total: f64 = self.counts.iter().sum();
126 if total > 0.0 && self.bin_edges.len() > 1 {
127 for (i, count) in self.counts.iter_mut().enumerate() {
128 let bin_width = self.bin_edges[i + 1] - self.bin_edges[i];
129 *count /= total * bin_width;
130 }
131 }
132 }
133}
134
135/// Computes equal-width bin edges and counts for a histogram.
136///
137/// Given a slice of data values and a desired number of bins, this function
138/// determines the bin edges and counts the number of data points that fall
139/// into each bin. Non-finite values (`NaN`, `+Inf`, `-Inf`) are silently
140/// ignored.
141///
142/// # Bin placement
143///
144/// Bins are equal-width and span the range `[min, max]` of the finite
145/// values, where `min` and `max` are the smallest and largest finite values
146/// in `data`. The i-th bin covers the half-open interval
147/// `[edge[i], edge[i+1])`, except for the last bin which is closed on both
148/// sides `[edge[n-1], edge[n]]` to include the maximum value.
149///
150/// # Single-value case
151///
152/// When all finite values are identical (i.e. `max == min`), the range is
153/// expanded to `[min - 0.5, max + 0.5]` so that the single value falls
154/// within the bin and the histogram has a visible width.
155///
156/// # Returns
157///
158/// A tuple `(edges, counts)` where:
159///
160/// * `edges` is a `Vec<f64>` of length `num_bins + 1` containing the sorted
161/// bin edges.
162/// * `counts` is a `Vec<f64>` of length `num_bins` containing the number of
163/// data points in each bin.
164///
165/// If `data` contains no finite values or `num_bins` is zero, both vectors
166/// are returned empty.
167///
168/// # Examples
169///
170/// ```
171/// use plotkit_core::charts::histogram::compute_bins;
172///
173/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
174/// let (edges, counts) = compute_bins(&data, 5);
175///
176/// assert_eq!(edges.len(), 6); // 5 bins + 1
177/// assert_eq!(counts.len(), 5); // one count per bin
178///
179/// // Every value lands in exactly one bin.
180/// let total: f64 = counts.iter().sum();
181/// assert_eq!(total, 5.0);
182/// ```
183pub fn compute_bins(data: &[f64], num_bins: usize) -> (Vec<f64>, Vec<f64>) {
184 let finite: Vec<f64> = data.iter().copied().filter(|v| v.is_finite()).collect();
185 if finite.is_empty() || num_bins == 0 {
186 return (vec![], vec![]);
187 }
188
189 let min = finite.iter().copied().fold(f64::INFINITY, f64::min);
190 let max = finite.iter().copied().fold(f64::NEG_INFINITY, f64::max);
191
192 // Handle single-value case: expand range so the histogram has visible width.
193 let (min, max) = if (max - min).abs() < f64::EPSILON {
194 (min - 0.5, max + 0.5)
195 } else {
196 (min, max)
197 };
198
199 let bin_width = (max - min) / num_bins as f64;
200 let edges: Vec<f64> = (0..=num_bins).map(|i| min + i as f64 * bin_width).collect();
201
202 // Count values in each bin.
203 let mut counts = vec![0.0f64; num_bins];
204 for &val in &finite {
205 let bin = ((val - min) / bin_width).floor() as usize;
206 // Clamp to the last bin so that the maximum value (which lands exactly
207 // on the right edge) is included in the final bin.
208 let bin = bin.min(num_bins - 1);
209 counts[bin] += 1.0;
210 }
211
212 (edges, counts)
213}
214
215// ---------------------------------------------------------------------------
216// Tests
217// ---------------------------------------------------------------------------
218
219#[cfg(test)]
220mod tests {
221 use super::*;
222 use crate::series::Series;
223
224 /// Tolerance for floating-point comparisons.
225 const TOL: f64 = 1e-12;
226
227 /// Returns true if `a` and `b` are within `TOL` of each other.
228 fn approx_eq(a: f64, b: f64) -> bool {
229 (a - b).abs() < TOL
230 }
231
232 // -----------------------------------------------------------------------
233 // compute_bins — basic behavior
234 // -----------------------------------------------------------------------
235
236 #[test]
237 fn basic_five_values_five_bins() {
238 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
239 let (edges, counts) = compute_bins(&data, 5);
240
241 assert_eq!(edges.len(), 6);
242 assert_eq!(counts.len(), 5);
243
244 // Total count should equal the number of data points.
245 let total: f64 = counts.iter().sum();
246 assert!(approx_eq(total, 5.0));
247
248 // First edge should be the minimum value.
249 assert!(approx_eq(edges[0], 1.0));
250 // Last edge should be the maximum value.
251 assert!(approx_eq(edges[5], 5.0));
252 }
253
254 #[test]
255 fn all_values_in_one_bin() {
256 let data = vec![1.0, 1.5, 1.8, 1.9, 2.0];
257 let (edges, counts) = compute_bins(&data, 1);
258
259 assert_eq!(edges.len(), 2);
260 assert_eq!(counts.len(), 1);
261 assert!(approx_eq(counts[0], 5.0));
262 assert!(approx_eq(edges[0], 1.0));
263 assert!(approx_eq(edges[1], 2.0));
264 }
265
266 #[test]
267 fn even_distribution_across_bins() {
268 // 10 values evenly spaced from 0 to 9, placed into 5 bins.
269 let data: Vec<f64> = (0..10).map(|i| i as f64).collect();
270 let (edges, counts) = compute_bins(&data, 5);
271
272 assert_eq!(edges.len(), 6);
273 assert_eq!(counts.len(), 5);
274
275 // With values 0..9 and 5 equal-width bins of width 1.8:
276 // every value should land in exactly one bin.
277 let total: f64 = counts.iter().sum();
278 assert!(approx_eq(total, 10.0));
279 }
280
281 // -----------------------------------------------------------------------
282 // compute_bins — edge cases
283 // -----------------------------------------------------------------------
284
285 #[test]
286 fn empty_data_returns_empty() {
287 let (edges, counts) = compute_bins(&[], 10);
288 assert!(edges.is_empty());
289 assert!(counts.is_empty());
290 }
291
292 #[test]
293 fn zero_bins_returns_empty() {
294 let data = vec![1.0, 2.0, 3.0];
295 let (edges, counts) = compute_bins(&data, 0);
296 assert!(edges.is_empty());
297 assert!(counts.is_empty());
298 }
299
300 #[test]
301 fn all_nan_returns_empty() {
302 let data = vec![f64::NAN, f64::NAN, f64::NAN];
303 let (edges, counts) = compute_bins(&data, 5);
304 assert!(edges.is_empty());
305 assert!(counts.is_empty());
306 }
307
308 #[test]
309 fn non_finite_values_are_ignored() {
310 let data = vec![f64::NAN, 1.0, f64::INFINITY, 2.0, f64::NEG_INFINITY, 3.0];
311 let (edges, counts) = compute_bins(&data, 3);
312
313 assert_eq!(edges.len(), 4);
314 assert_eq!(counts.len(), 3);
315
316 // Only the three finite values (1.0, 2.0, 3.0) should be counted.
317 let total: f64 = counts.iter().sum();
318 assert!(approx_eq(total, 3.0));
319 }
320
321 #[test]
322 fn single_value_expands_range() {
323 let data = vec![5.0, 5.0, 5.0];
324 let (edges, counts) = compute_bins(&data, 2);
325
326 assert_eq!(edges.len(), 3);
327 assert_eq!(counts.len(), 2);
328
329 // Range should be expanded to [4.5, 5.5].
330 assert!(approx_eq(edges[0], 4.5));
331 assert!(approx_eq(edges[2], 5.5));
332
333 // All values should be counted.
334 let total: f64 = counts.iter().sum();
335 assert!(approx_eq(total, 3.0));
336 }
337
338 #[test]
339 fn single_data_point_single_bin() {
340 let data = vec![42.0];
341 let (edges, counts) = compute_bins(&data, 1);
342
343 assert_eq!(edges.len(), 2);
344 assert_eq!(counts.len(), 1);
345 assert!(approx_eq(edges[0], 41.5));
346 assert!(approx_eq(edges[1], 42.5));
347 assert!(approx_eq(counts[0], 1.0));
348 }
349
350 #[test]
351 fn maximum_value_lands_in_last_bin() {
352 // The maximum value sits exactly on the right edge of the last bin.
353 // It must be included in the last bin, not lost.
354 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0];
355 let (_, counts) = compute_bins(&data, 4);
356
357 let total: f64 = counts.iter().sum();
358 assert!(approx_eq(total, 5.0));
359
360 // Specifically, 4.0 (the max) should be in the last bin.
361 assert!(counts[3] >= 1.0);
362 }
363
364 // -----------------------------------------------------------------------
365 // compute_bins — structural invariants
366 // -----------------------------------------------------------------------
367
368 #[test]
369 fn edges_are_monotonically_increasing() {
370 let data: Vec<f64> = (0..100).map(|i| (i as f64) * 0.37 - 10.0).collect();
371 let (edges, _) = compute_bins(&data, 15);
372
373 for window in edges.windows(2) {
374 assert!(
375 window[1] > window[0],
376 "edges not monotonically increasing: {} >= {}",
377 window[0],
378 window[1]
379 );
380 }
381 }
382
383 #[test]
384 fn bins_are_equal_width() {
385 let data = vec![0.0, 10.0, 20.0, 30.0, 40.0, 50.0];
386 let (edges, _) = compute_bins(&data, 5);
387
388 let expected_width = (50.0 - 0.0) / 5.0;
389 for window in edges.windows(2) {
390 let width = window[1] - window[0];
391 assert!(
392 approx_eq(width, expected_width),
393 "bin width {} differs from expected {}",
394 width,
395 expected_width
396 );
397 }
398 }
399
400 #[test]
401 fn total_count_equals_finite_data_length() {
402 let data = vec![
403 1.0,
404 2.0,
405 3.0,
406 4.0,
407 5.0,
408 f64::NAN,
409 f64::INFINITY,
410 f64::NEG_INFINITY,
411 ];
412 let (_, counts) = compute_bins(&data, 3);
413
414 let total: f64 = counts.iter().sum();
415 assert!(approx_eq(total, 5.0));
416 }
417
418 #[test]
419 fn large_number_of_bins() {
420 let data: Vec<f64> = (0..1000).map(|i| i as f64).collect();
421 let (edges, counts) = compute_bins(&data, 500);
422
423 assert_eq!(edges.len(), 501);
424 assert_eq!(counts.len(), 500);
425
426 let total: f64 = counts.iter().sum();
427 assert!(approx_eq(total, 1000.0));
428 }
429
430 #[test]
431 fn negative_values() {
432 let data = vec![-10.0, -5.0, -3.0, -1.0, 0.0];
433 let (edges, counts) = compute_bins(&data, 2);
434
435 assert_eq!(edges.len(), 3);
436 assert_eq!(counts.len(), 2);
437
438 assert!(approx_eq(edges[0], -10.0));
439 assert!(approx_eq(edges[2], 0.0));
440
441 let total: f64 = counts.iter().sum();
442 assert!(approx_eq(total, 5.0));
443 }
444
445 #[test]
446 fn mixed_positive_and_negative() {
447 let data = vec![-2.0, -1.0, 0.0, 1.0, 2.0];
448 let (edges, counts) = compute_bins(&data, 4);
449
450 assert_eq!(edges.len(), 5);
451 assert_eq!(counts.len(), 4);
452
453 assert!(approx_eq(edges[0], -2.0));
454 assert!(approx_eq(edges[4], 2.0));
455
456 let total: f64 = counts.iter().sum();
457 assert!(approx_eq(total, 5.0));
458 }
459
460 // -----------------------------------------------------------------------
461 // HistArtist builder methods
462 // -----------------------------------------------------------------------
463
464 /// Helper: build a minimal `HistArtist` for builder method tests.
465 fn sample_hist() -> HistArtist {
466 HistArtist {
467 data: Series::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
468 bins: 3,
469 bin_edges: vec![1.0, 3.0, 5.0, 7.0],
470 counts: vec![2.0, 2.0, 2.0],
471 color: Color::TAB_BLUE,
472 label: None,
473 alpha: 1.0,
474 density: false,
475 }
476 }
477
478 #[test]
479 fn builder_color() {
480 let mut h = sample_hist();
481 h.color(Color::TAB_RED);
482 assert_eq!(h.color, Color::TAB_RED);
483 }
484
485 #[test]
486 fn builder_label() {
487 let mut h = sample_hist();
488 assert!(h.label.is_none());
489 h.label("Distribution");
490 assert_eq!(h.label.as_deref(), Some("Distribution"));
491 }
492
493 #[test]
494 fn builder_label_overwrite() {
495 let mut h = sample_hist();
496 h.label("first");
497 h.label("second");
498 assert_eq!(h.label.as_deref(), Some("second"));
499 }
500
501 #[test]
502 fn builder_alpha_clamps_to_range() {
503 let mut h = sample_hist();
504
505 h.alpha(0.5);
506 assert!(approx_eq(h.alpha, 0.5));
507
508 h.alpha(-1.0);
509 assert!(approx_eq(h.alpha, 0.0));
510
511 h.alpha(2.0);
512 assert!(approx_eq(h.alpha, 1.0));
513 }
514
515 #[test]
516 fn builder_alpha_boundaries() {
517 let mut h = sample_hist();
518
519 h.alpha(0.0);
520 assert!(approx_eq(h.alpha, 0.0));
521
522 h.alpha(1.0);
523 assert!(approx_eq(h.alpha, 1.0));
524 }
525
526 #[test]
527 fn builder_density_normalizes_counts() {
528 let mut h = sample_hist();
529 // counts = [2.0, 2.0, 2.0], bin_edges = [1.0, 3.0, 5.0, 7.0]
530 // total = 6.0, each bin_width = 2.0
531 // density[i] = count[i] / (total * bin_width) = 2.0 / (6.0 * 2.0) = 1/6
532 h.density(true);
533
534 assert!(h.density);
535 let expected = 2.0 / (6.0 * 2.0);
536 for &c in &h.counts {
537 assert!(
538 approx_eq(c, expected),
539 "expected density {expected}, got {c}"
540 );
541 }
542 }
543
544 #[test]
545 fn builder_density_false_does_not_modify_counts() {
546 let mut h = sample_hist();
547 let original_counts = h.counts.clone();
548 h.density(false);
549 assert!(!h.density);
550 assert_eq!(h.counts, original_counts);
551 }
552
553 #[test]
554 fn builder_density_with_zero_total() {
555 let mut h = HistArtist {
556 data: Series::new(vec![]),
557 bins: 2,
558 bin_edges: vec![0.0, 1.0, 2.0],
559 counts: vec![0.0, 0.0],
560 color: Color::BLACK,
561 label: None,
562 alpha: 1.0,
563 density: false,
564 };
565 // Should not panic or produce NaN when total is zero.
566 h.density(true);
567 assert!(h.counts.iter().all(|c| c.is_finite()));
568 }
569
570 #[test]
571 fn builder_density_area_integrates_to_one() {
572 // Use compute_bins to get realistic counts, then enable density.
573 let data: Vec<f64> = (0..100).map(|i| i as f64 * 0.1).collect();
574 let (edges, counts) = compute_bins(&data, 10);
575 let mut h = HistArtist {
576 data: Series::new(data),
577 bins: 10,
578 bin_edges: edges,
579 counts,
580 color: Color::TAB_BLUE,
581 label: None,
582 alpha: 1.0,
583 density: false,
584 };
585
586 h.density(true);
587
588 // The total area (sum of density * bin_width) should be 1.0.
589 let area: f64 = h
590 .counts
591 .iter()
592 .enumerate()
593 .map(|(i, &d)| d * (h.bin_edges[i + 1] - h.bin_edges[i]))
594 .sum();
595 assert!(
596 (area - 1.0).abs() < 1e-10,
597 "density area should be 1.0, got {area}"
598 );
599 }
600
601 #[test]
602 fn builder_chaining() {
603 let mut h = sample_hist();
604 h.color(Color::TAB_GREEN).label("Test").alpha(0.8);
605
606 assert_eq!(h.color, Color::TAB_GREEN);
607 assert_eq!(h.label.as_deref(), Some("Test"));
608 assert!(approx_eq(h.alpha, 0.8));
609 }
610}