plotkit_core/charts/histogram.rs
1//! Histogram chart builder methods and binning utilities.
2//!
3//! This module extends [`HistArtist`] with a fluent API for configuring
4//! histogram properties, and provides the [`compute_bins`] utility function
5//! for computing equal-width bin edges and counts from raw data.
6//!
7//! Since [`Axes::hist`] returns `Result<&mut HistArtist>`, the builder
8//! methods can be chained directly on the return value:
9//!
10//! ```ignore
11//! ax.hist(&data, 20)?
12//! .color(Color::TAB_BLUE)
13//! .label("Distribution")
14//! .alpha(0.7)
15//! .density(true);
16//! ```
17//!
18//! The [`compute_bins`] function is typically called internally when
19//! constructing a [`HistArtist`], but it is public so that users can
20//! pre-compute bin edges and counts for custom workflows.
21//!
22//! [`Axes::hist`]: crate::axes::Axes::hist
23
24use crate::artist::HistArtist;
25use crate::primitives::Color;
26
27impl HistArtist {
28 /// Sets the bar fill color for every bin in the histogram.
29 ///
30 /// Accepts any [`Color`] value, which can be constructed from RGB
31 /// components, hex strings, or named color constants.
32 ///
33 /// # Arguments
34 ///
35 /// * `color` - The [`Color`] to fill each histogram bar with.
36 ///
37 /// # Examples
38 ///
39 /// ```ignore
40 /// artist.color(Color::TAB_BLUE);
41 /// ```
42 pub fn color(&mut self, color: Color) -> &mut Self {
43 self.color = color;
44 self
45 }
46
47 /// Sets the legend label for this histogram.
48 ///
49 /// When a label is set, the histogram will appear in the legend if one
50 /// is displayed on the axes. Pass an empty string or omit this call to
51 /// exclude the histogram from the legend. Calling this method again
52 /// overwrites any previously set label.
53 ///
54 /// # Arguments
55 ///
56 /// * `label` - A string slice that will be stored as the legend entry.
57 ///
58 /// # Examples
59 ///
60 /// ```ignore
61 /// artist.label("Scores");
62 /// ```
63 pub fn label(&mut self, label: &str) -> &mut Self {
64 self.label = Some(label.to_string());
65 self
66 }
67
68 /// Sets the opacity (0.0 = fully transparent, 1.0 = fully opaque).
69 ///
70 /// The value is clamped to the `[0.0, 1.0]` range. The default opacity
71 /// is determined by the active theme (typically `0.7` for histograms so
72 /// that overlapping distributions remain visible).
73 ///
74 /// # Arguments
75 ///
76 /// * `alpha` - The desired opacity level.
77 ///
78 /// # Examples
79 ///
80 /// ```ignore
81 /// artist.alpha(0.5); // 50% transparent
82 /// ```
83 pub fn alpha(&mut self, alpha: f64) -> &mut Self {
84 self.alpha = alpha.clamp(0.0, 1.0);
85 self
86 }
87
88 /// Controls whether the histogram displays probability density instead
89 /// of raw counts.
90 ///
91 /// When `density` is `true`, the `counts` vector is normalized so that
92 /// the total area under the histogram integrates to 1.0. Each bin's
93 /// value becomes `count / (total * bin_width)`. This is useful for
94 /// comparing distributions with different sample sizes or overlaying a
95 /// probability density function.
96 ///
97 /// When `density` is `false` (the default), the `counts` vector stores
98 /// raw frequency counts.
99 ///
100 /// # Arguments
101 ///
102 /// * `density` - If `true`, normalize the histogram to unit area.
103 ///
104 /// # Examples
105 ///
106 /// ```ignore
107 /// artist.density(true); // show probability density
108 /// ```
109 pub fn density(&mut self, density: bool) -> &mut Self {
110 self.density = density;
111 if density {
112 self.recompute_density();
113 }
114 self
115 }
116
117 /// Normalizes the `counts` vector so that the total area under the
118 /// histogram equals 1.0.
119 ///
120 /// Each bin value is divided by `total_count * bin_width`, where
121 /// `total_count` is the sum of all counts and `bin_width` is the width
122 /// of the corresponding bin. This method is called automatically by
123 /// [`density`](Self::density) when density mode is enabled.
124 fn recompute_density(&mut self) {
125 let total: f64 = self.counts.iter().sum();
126 if total > 0.0 && self.bin_edges.len() > 1 {
127 for (i, count) in self.counts.iter_mut().enumerate() {
128 let bin_width = self.bin_edges[i + 1] - self.bin_edges[i];
129 *count /= total * bin_width;
130 }
131 }
132 }
133}
134
135/// Computes equal-width bin edges and counts for a histogram.
136///
137/// Given a slice of data values and a desired number of bins, this function
138/// determines the bin edges and counts the number of data points that fall
139/// into each bin. Non-finite values (`NaN`, `+Inf`, `-Inf`) are silently
140/// ignored.
141///
142/// # Bin placement
143///
144/// Bins are equal-width and span the range `[min, max]` of the finite
145/// values, where `min` and `max` are the smallest and largest finite values
146/// in `data`. The i-th bin covers the half-open interval
147/// `[edge[i], edge[i+1])`, except for the last bin which is closed on both
148/// sides `[edge[n-1], edge[n]]` to include the maximum value.
149///
150/// # Single-value case
151///
152/// When all finite values are identical (i.e. `max == min`), the range is
153/// expanded to `[min - 0.5, max + 0.5]` so that the single value falls
154/// within the bin and the histogram has a visible width.
155///
156/// # Returns
157///
158/// A tuple `(edges, counts)` where:
159///
160/// * `edges` is a `Vec<f64>` of length `num_bins + 1` containing the sorted
161/// bin edges.
162/// * `counts` is a `Vec<f64>` of length `num_bins` containing the number of
163/// data points in each bin.
164///
165/// If `data` contains no finite values or `num_bins` is zero, both vectors
166/// are returned empty.
167///
168/// # Examples
169///
170/// ```
171/// use plotkit_core::charts::histogram::compute_bins;
172///
173/// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
174/// let (edges, counts) = compute_bins(&data, 5);
175///
176/// assert_eq!(edges.len(), 6); // 5 bins + 1
177/// assert_eq!(counts.len(), 5); // one count per bin
178///
179/// // Every value lands in exactly one bin.
180/// let total: f64 = counts.iter().sum();
181/// assert_eq!(total, 5.0);
182/// ```
183pub fn compute_bins(data: &[f64], num_bins: usize) -> (Vec<f64>, Vec<f64>) {
184 let finite: Vec<f64> = data.iter().copied().filter(|v| v.is_finite()).collect();
185 if finite.is_empty() || num_bins == 0 {
186 return (vec![], vec![]);
187 }
188
189 let min = finite.iter().copied().fold(f64::INFINITY, f64::min);
190 let max = finite.iter().copied().fold(f64::NEG_INFINITY, f64::max);
191
192 // Handle single-value case: expand range so the histogram has visible width.
193 let (min, max) = if (max - min).abs() < f64::EPSILON {
194 (min - 0.5, max + 0.5)
195 } else {
196 (min, max)
197 };
198
199 let bin_width = (max - min) / num_bins as f64;
200 let edges: Vec<f64> = (0..=num_bins)
201 .map(|i| min + i as f64 * bin_width)
202 .collect();
203
204 // Count values in each bin.
205 let mut counts = vec![0.0f64; num_bins];
206 for &val in &finite {
207 let bin = ((val - min) / bin_width).floor() as usize;
208 // Clamp to the last bin so that the maximum value (which lands exactly
209 // on the right edge) is included in the final bin.
210 let bin = bin.min(num_bins - 1);
211 counts[bin] += 1.0;
212 }
213
214 (edges, counts)
215}
216
217// ---------------------------------------------------------------------------
218// Tests
219// ---------------------------------------------------------------------------
220
221#[cfg(test)]
222mod tests {
223 use super::*;
224 use crate::series::Series;
225
226 /// Tolerance for floating-point comparisons.
227 const TOL: f64 = 1e-12;
228
229 /// Returns true if `a` and `b` are within `TOL` of each other.
230 fn approx_eq(a: f64, b: f64) -> bool {
231 (a - b).abs() < TOL
232 }
233
234 // -----------------------------------------------------------------------
235 // compute_bins — basic behavior
236 // -----------------------------------------------------------------------
237
238 #[test]
239 fn basic_five_values_five_bins() {
240 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
241 let (edges, counts) = compute_bins(&data, 5);
242
243 assert_eq!(edges.len(), 6);
244 assert_eq!(counts.len(), 5);
245
246 // Total count should equal the number of data points.
247 let total: f64 = counts.iter().sum();
248 assert!(approx_eq(total, 5.0));
249
250 // First edge should be the minimum value.
251 assert!(approx_eq(edges[0], 1.0));
252 // Last edge should be the maximum value.
253 assert!(approx_eq(edges[5], 5.0));
254 }
255
256 #[test]
257 fn all_values_in_one_bin() {
258 let data = vec![1.0, 1.5, 1.8, 1.9, 2.0];
259 let (edges, counts) = compute_bins(&data, 1);
260
261 assert_eq!(edges.len(), 2);
262 assert_eq!(counts.len(), 1);
263 assert!(approx_eq(counts[0], 5.0));
264 assert!(approx_eq(edges[0], 1.0));
265 assert!(approx_eq(edges[1], 2.0));
266 }
267
268 #[test]
269 fn even_distribution_across_bins() {
270 // 10 values evenly spaced from 0 to 9, placed into 5 bins.
271 let data: Vec<f64> = (0..10).map(|i| i as f64).collect();
272 let (edges, counts) = compute_bins(&data, 5);
273
274 assert_eq!(edges.len(), 6);
275 assert_eq!(counts.len(), 5);
276
277 // With values 0..9 and 5 equal-width bins of width 1.8:
278 // every value should land in exactly one bin.
279 let total: f64 = counts.iter().sum();
280 assert!(approx_eq(total, 10.0));
281 }
282
283 // -----------------------------------------------------------------------
284 // compute_bins — edge cases
285 // -----------------------------------------------------------------------
286
287 #[test]
288 fn empty_data_returns_empty() {
289 let (edges, counts) = compute_bins(&[], 10);
290 assert!(edges.is_empty());
291 assert!(counts.is_empty());
292 }
293
294 #[test]
295 fn zero_bins_returns_empty() {
296 let data = vec![1.0, 2.0, 3.0];
297 let (edges, counts) = compute_bins(&data, 0);
298 assert!(edges.is_empty());
299 assert!(counts.is_empty());
300 }
301
302 #[test]
303 fn all_nan_returns_empty() {
304 let data = vec![f64::NAN, f64::NAN, f64::NAN];
305 let (edges, counts) = compute_bins(&data, 5);
306 assert!(edges.is_empty());
307 assert!(counts.is_empty());
308 }
309
310 #[test]
311 fn non_finite_values_are_ignored() {
312 let data = vec![f64::NAN, 1.0, f64::INFINITY, 2.0, f64::NEG_INFINITY, 3.0];
313 let (edges, counts) = compute_bins(&data, 3);
314
315 assert_eq!(edges.len(), 4);
316 assert_eq!(counts.len(), 3);
317
318 // Only the three finite values (1.0, 2.0, 3.0) should be counted.
319 let total: f64 = counts.iter().sum();
320 assert!(approx_eq(total, 3.0));
321 }
322
323 #[test]
324 fn single_value_expands_range() {
325 let data = vec![5.0, 5.0, 5.0];
326 let (edges, counts) = compute_bins(&data, 2);
327
328 assert_eq!(edges.len(), 3);
329 assert_eq!(counts.len(), 2);
330
331 // Range should be expanded to [4.5, 5.5].
332 assert!(approx_eq(edges[0], 4.5));
333 assert!(approx_eq(edges[2], 5.5));
334
335 // All values should be counted.
336 let total: f64 = counts.iter().sum();
337 assert!(approx_eq(total, 3.0));
338 }
339
340 #[test]
341 fn single_data_point_single_bin() {
342 let data = vec![42.0];
343 let (edges, counts) = compute_bins(&data, 1);
344
345 assert_eq!(edges.len(), 2);
346 assert_eq!(counts.len(), 1);
347 assert!(approx_eq(edges[0], 41.5));
348 assert!(approx_eq(edges[1], 42.5));
349 assert!(approx_eq(counts[0], 1.0));
350 }
351
352 #[test]
353 fn maximum_value_lands_in_last_bin() {
354 // The maximum value sits exactly on the right edge of the last bin.
355 // It must be included in the last bin, not lost.
356 let data = vec![0.0, 1.0, 2.0, 3.0, 4.0];
357 let (_, counts) = compute_bins(&data, 4);
358
359 let total: f64 = counts.iter().sum();
360 assert!(approx_eq(total, 5.0));
361
362 // Specifically, 4.0 (the max) should be in the last bin.
363 assert!(counts[3] >= 1.0);
364 }
365
366 // -----------------------------------------------------------------------
367 // compute_bins — structural invariants
368 // -----------------------------------------------------------------------
369
370 #[test]
371 fn edges_are_monotonically_increasing() {
372 let data: Vec<f64> = (0..100).map(|i| (i as f64) * 0.37 - 10.0).collect();
373 let (edges, _) = compute_bins(&data, 15);
374
375 for window in edges.windows(2) {
376 assert!(
377 window[1] > window[0],
378 "edges not monotonically increasing: {} >= {}",
379 window[0],
380 window[1]
381 );
382 }
383 }
384
385 #[test]
386 fn bins_are_equal_width() {
387 let data = vec![0.0, 10.0, 20.0, 30.0, 40.0, 50.0];
388 let (edges, _) = compute_bins(&data, 5);
389
390 let expected_width = (50.0 - 0.0) / 5.0;
391 for window in edges.windows(2) {
392 let width = window[1] - window[0];
393 assert!(
394 approx_eq(width, expected_width),
395 "bin width {} differs from expected {}",
396 width,
397 expected_width
398 );
399 }
400 }
401
402 #[test]
403 fn total_count_equals_finite_data_length() {
404 let data = vec![
405 1.0, 2.0, 3.0, 4.0, 5.0,
406 f64::NAN, f64::INFINITY, f64::NEG_INFINITY,
407 ];
408 let (_, counts) = compute_bins(&data, 3);
409
410 let total: f64 = counts.iter().sum();
411 assert!(approx_eq(total, 5.0));
412 }
413
414 #[test]
415 fn large_number_of_bins() {
416 let data: Vec<f64> = (0..1000).map(|i| i as f64).collect();
417 let (edges, counts) = compute_bins(&data, 500);
418
419 assert_eq!(edges.len(), 501);
420 assert_eq!(counts.len(), 500);
421
422 let total: f64 = counts.iter().sum();
423 assert!(approx_eq(total, 1000.0));
424 }
425
426 #[test]
427 fn negative_values() {
428 let data = vec![-10.0, -5.0, -3.0, -1.0, 0.0];
429 let (edges, counts) = compute_bins(&data, 2);
430
431 assert_eq!(edges.len(), 3);
432 assert_eq!(counts.len(), 2);
433
434 assert!(approx_eq(edges[0], -10.0));
435 assert!(approx_eq(edges[2], 0.0));
436
437 let total: f64 = counts.iter().sum();
438 assert!(approx_eq(total, 5.0));
439 }
440
441 #[test]
442 fn mixed_positive_and_negative() {
443 let data = vec![-2.0, -1.0, 0.0, 1.0, 2.0];
444 let (edges, counts) = compute_bins(&data, 4);
445
446 assert_eq!(edges.len(), 5);
447 assert_eq!(counts.len(), 4);
448
449 assert!(approx_eq(edges[0], -2.0));
450 assert!(approx_eq(edges[4], 2.0));
451
452 let total: f64 = counts.iter().sum();
453 assert!(approx_eq(total, 5.0));
454 }
455
456 // -----------------------------------------------------------------------
457 // HistArtist builder methods
458 // -----------------------------------------------------------------------
459
460 /// Helper: build a minimal `HistArtist` for builder method tests.
461 fn sample_hist() -> HistArtist {
462 HistArtist {
463 data: Series::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
464 bins: 3,
465 bin_edges: vec![1.0, 3.0, 5.0, 7.0],
466 counts: vec![2.0, 2.0, 2.0],
467 color: Color::TAB_BLUE,
468 label: None,
469 alpha: 1.0,
470 density: false,
471 }
472 }
473
474 #[test]
475 fn builder_color() {
476 let mut h = sample_hist();
477 h.color(Color::TAB_RED);
478 assert_eq!(h.color, Color::TAB_RED);
479 }
480
481 #[test]
482 fn builder_label() {
483 let mut h = sample_hist();
484 assert!(h.label.is_none());
485 h.label("Distribution");
486 assert_eq!(h.label.as_deref(), Some("Distribution"));
487 }
488
489 #[test]
490 fn builder_label_overwrite() {
491 let mut h = sample_hist();
492 h.label("first");
493 h.label("second");
494 assert_eq!(h.label.as_deref(), Some("second"));
495 }
496
497 #[test]
498 fn builder_alpha_clamps_to_range() {
499 let mut h = sample_hist();
500
501 h.alpha(0.5);
502 assert!(approx_eq(h.alpha, 0.5));
503
504 h.alpha(-1.0);
505 assert!(approx_eq(h.alpha, 0.0));
506
507 h.alpha(2.0);
508 assert!(approx_eq(h.alpha, 1.0));
509 }
510
511 #[test]
512 fn builder_alpha_boundaries() {
513 let mut h = sample_hist();
514
515 h.alpha(0.0);
516 assert!(approx_eq(h.alpha, 0.0));
517
518 h.alpha(1.0);
519 assert!(approx_eq(h.alpha, 1.0));
520 }
521
522 #[test]
523 fn builder_density_normalizes_counts() {
524 let mut h = sample_hist();
525 // counts = [2.0, 2.0, 2.0], bin_edges = [1.0, 3.0, 5.0, 7.0]
526 // total = 6.0, each bin_width = 2.0
527 // density[i] = count[i] / (total * bin_width) = 2.0 / (6.0 * 2.0) = 1/6
528 h.density(true);
529
530 assert!(h.density);
531 let expected = 2.0 / (6.0 * 2.0);
532 for &c in &h.counts {
533 assert!(
534 approx_eq(c, expected),
535 "expected density {expected}, got {c}"
536 );
537 }
538 }
539
540 #[test]
541 fn builder_density_false_does_not_modify_counts() {
542 let mut h = sample_hist();
543 let original_counts = h.counts.clone();
544 h.density(false);
545 assert!(!h.density);
546 assert_eq!(h.counts, original_counts);
547 }
548
549 #[test]
550 fn builder_density_with_zero_total() {
551 let mut h = HistArtist {
552 data: Series::new(vec![]),
553 bins: 2,
554 bin_edges: vec![0.0, 1.0, 2.0],
555 counts: vec![0.0, 0.0],
556 color: Color::BLACK,
557 label: None,
558 alpha: 1.0,
559 density: false,
560 };
561 // Should not panic or produce NaN when total is zero.
562 h.density(true);
563 assert!(h.counts.iter().all(|c| c.is_finite()));
564 }
565
566 #[test]
567 fn builder_density_area_integrates_to_one() {
568 // Use compute_bins to get realistic counts, then enable density.
569 let data: Vec<f64> = (0..100).map(|i| i as f64 * 0.1).collect();
570 let (edges, counts) = compute_bins(&data, 10);
571 let mut h = HistArtist {
572 data: Series::new(data),
573 bins: 10,
574 bin_edges: edges,
575 counts,
576 color: Color::TAB_BLUE,
577 label: None,
578 alpha: 1.0,
579 density: false,
580 };
581
582 h.density(true);
583
584 // The total area (sum of density * bin_width) should be 1.0.
585 let area: f64 = h
586 .counts
587 .iter()
588 .enumerate()
589 .map(|(i, &d)| d * (h.bin_edges[i + 1] - h.bin_edges[i]))
590 .sum();
591 assert!(
592 (area - 1.0).abs() < 1e-10,
593 "density area should be 1.0, got {area}"
594 );
595 }
596
597 #[test]
598 fn builder_chaining() {
599 let mut h = sample_hist();
600 h.color(Color::TAB_GREEN)
601 .label("Test")
602 .alpha(0.8);
603
604 assert_eq!(h.color, Color::TAB_GREEN);
605 assert_eq!(h.label.as_deref(), Some("Test"));
606 assert!(approx_eq(h.alpha, 0.8));
607 }
608}