1use thiserror::Error;
2
3use crate::barplot::{BarplotError, BarplotGraphics, BarplotOptions, barplot};
4use crate::border::BorderType;
5use crate::canvas::Scale;
6use crate::color::{NamedColor, TermColor};
7use crate::math::{ceil_neg_log10, minmax, usize_to_f64};
8use crate::plot::Plot;
9
10const DEFAULT_HISTOGRAM_SYMBOL: char = '\u{2587}';
11const HISTOGRAM_CLOSE_FACTORS: [f64; 5] = [1.0, 2.0, 2.5, 5.0, 10.0];
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15#[non_exhaustive]
16pub enum ClosedInterval {
17 Left,
19 Right,
21}
22
23#[derive(Debug, Clone)]
25#[non_exhaustive]
26pub struct HistogramOptions {
27 pub title: Option<String>,
29 pub xlabel: Option<String>,
31 pub ylabel: Option<String>,
33 pub border: BorderType,
35 pub margin: u16,
37 pub padding: u16,
39 pub labels: bool,
41 pub color: TermColor,
43 pub width: usize,
45 pub symbol: Option<char>,
47 pub xscale: Scale,
49 pub nbins: Option<usize>,
51 pub closed: ClosedInterval,
53}
54
55impl Default for HistogramOptions {
56 fn default() -> Self {
57 Self {
58 title: None,
59 xlabel: None,
60 ylabel: None,
61 border: BorderType::Barplot,
62 margin: Plot::<BarplotGraphics>::DEFAULT_MARGIN,
63 padding: Plot::<BarplotGraphics>::DEFAULT_PADDING,
64 labels: true,
65 color: TermColor::Named(NamedColor::Green),
66 width: 40,
67 symbol: Some(DEFAULT_HISTOGRAM_SYMBOL),
68 xscale: Scale::Identity,
69 nbins: None,
70 closed: ClosedInterval::Left,
71 }
72 }
73}
74
75#[derive(Debug, Error, PartialEq)]
77#[non_exhaustive]
78pub enum HistogramError {
79 #[error("histogram data cannot be empty")]
81 EmptyData,
82 #[error("nbins must be greater than 0")]
84 InvalidBinCount,
85 #[error("invalid numeric value: {value}")]
87 InvalidNumericValue { value: String },
88 #[error(transparent)]
90 Barplot(#[from] BarplotError),
91}
92
93pub fn histogram<V: ToString>(
119 data: &[V],
120 options: HistogramOptions,
121) -> Result<Plot<BarplotGraphics>, HistogramError> {
122 if data.is_empty() {
123 return Err(HistogramError::EmptyData);
124 }
125
126 if matches!(options.nbins, Some(0)) {
127 return Err(HistogramError::InvalidBinCount);
128 }
129
130 let values = parse_data(data)?;
131 let (labels, counts) = build_histogram(&values, options.nbins, options.closed);
132
133 let xlabel = options
134 .xlabel
135 .unwrap_or_else(|| transformed_frequency_label(options.xscale));
136
137 let barplot_options = BarplotOptions {
138 title: options.title,
139 xlabel: Some(xlabel),
140 ylabel: options.ylabel,
141 border: options.border,
142 margin: options.margin,
143 padding: options.padding,
144 labels: options.labels,
145 color: options.color,
146 width: options.width,
147 symbol: options.symbol,
148 xscale: options.xscale,
149 };
150
151 barplot(&labels, &counts, barplot_options).map_err(HistogramError::from)
152}
153
154fn parse_data<V: ToString>(data: &[V]) -> Result<Vec<f64>, HistogramError> {
155 data.iter()
156 .map(|value| {
157 let display = value.to_string();
158 let numeric =
159 display
160 .parse::<f64>()
161 .map_err(|_| HistogramError::InvalidNumericValue {
162 value: display.clone(),
163 })?;
164 if !numeric.is_finite() {
165 return Err(HistogramError::InvalidNumericValue { value: display });
166 }
167 Ok(numeric)
168 })
169 .collect()
170}
171
172fn build_histogram(
173 data: &[f64],
174 nbins: Option<usize>,
175 closed: ClosedInterval,
176) -> (Vec<String>, Vec<usize>) {
177 let bins = nbins.unwrap_or_else(|| sturges_bins(data.len()));
178 let (min_value, max_value) = minmax(data);
179 let raw_width = if bins > 1 {
180 (max_value - min_value) / usize_to_f64(bins - 1)
181 } else {
182 max_value - min_value
183 };
184 let mut bin_width = rounded_bin_width(raw_width);
185 if !bin_width.is_finite() || bin_width <= 0.0 {
186 bin_width = 1.0;
187 }
188
189 let lower = round_down_to_step(min_value, bin_width);
190 let mut upper = round_up_to_step(max_value, bin_width);
191 if upper <= lower {
192 upper = lower + bin_width;
193 }
194
195 let mut edges = vec![lower];
196 loop {
197 let next = edges.last().copied().unwrap_or(lower) + bin_width;
198 if next >= upper {
199 break;
200 }
201 edges.push(next);
202 }
203 edges.push(upper);
204
205 let mut counts = vec![0usize; edges.len().saturating_sub(1)];
206 for value in data {
207 let mut index = match closed {
208 ClosedInterval::Left => edges
209 .partition_point(|edge| *edge <= *value)
210 .saturating_sub(1),
211 ClosedInterval::Right => edges
212 .partition_point(|edge| *edge < *value)
213 .saturating_sub(1),
214 };
215 if let Some(last_index) = counts.len().checked_sub(1) {
216 index = index.min(last_index);
217 counts[index] += 1;
218 }
219 }
220
221 let labels = histogram_labels(&edges, bin_width, closed);
222 (labels, counts)
223}
224
225fn histogram_labels(edges: &[f64], bin_width: f64, closed: ClosedInterval) -> Vec<String> {
226 let mut rounded_edges = Vec::with_capacity(edges.len());
227 let mut pad_left = 0usize;
228 let mut pad_right = 0usize;
229
230 for edge in edges {
231 let value = float_round_log10(*edge, bin_width);
232 let text = format_float_like_ruby(value);
233 let (left_width, right_width) = split_widths(&text);
234 pad_left = pad_left.max(left_width);
235 pad_right = pad_right.max(right_width);
236 rounded_edges.push(value);
237 }
238
239 let (left_bracket, right_bracket) = match closed {
240 ClosedInterval::Left => ('[', ')'),
241 ClosedInterval::Right => ('(', ']'),
242 };
243
244 let mut labels = Vec::with_capacity(edges.len().saturating_sub(1));
245 for rounded_pair in rounded_edges.windows(2) {
246 let val1 = rounded_pair[0];
247 let mut val2 = rounded_pair[1];
248 if val2 == 0.0 && val1 < 0.0 && bin_width < 0.01 {
249 val2 = -0.0;
250 }
251 let text1 = format_float_like_ruby(val1);
252 let text2 = format_float_like_ruby(val2);
253 let (left1, right1) = split_widths(&text1);
254 let (left2, right2) = split_widths(&text2);
255
256 let mut label = String::new();
257 label.push(left_bracket);
258 label.push_str(&" ".repeat(pad_left.saturating_sub(left1)));
259 label.push_str(&text1);
260 label.push_str(&" ".repeat(pad_right.saturating_sub(right1)));
261 label.push_str(", ");
262 label.push_str(&" ".repeat(pad_left.saturating_sub(left2)));
263 label.push_str(&text2);
264 label.push_str(&" ".repeat(pad_right.saturating_sub(right2)));
265 label.push(right_bracket);
266 labels.push(label);
267 }
268
269 labels
270}
271
272fn transformed_frequency_label(scale: Scale) -> String {
273 match scale {
274 Scale::Identity => String::from("Frequency"),
275 Scale::Ln => String::from("Frequency [ln]"),
276 Scale::Log2 => String::from("Frequency [log2]"),
277 Scale::Log10 => String::from("Frequency [log10]"),
278 }
279}
280
281fn sturges_bins(sample_size: usize) -> usize {
282 if sample_size <= 1 {
283 return 1;
284 }
285
286 let mut bins = 1usize;
287 let mut boundary = 1usize;
288 while boundary < sample_size {
289 boundary = boundary.saturating_mul(2);
290 bins = bins.saturating_add(1);
291 }
292 bins
293}
294
295fn rounded_bin_width(raw_width: f64) -> f64 {
296 if !raw_width.is_finite() || raw_width <= 0.0 {
297 return 1.0;
298 }
299
300 let magnitude = 10f64.powf(raw_width.log10().floor());
301 let scaled = raw_width / magnitude;
302 let factor = HISTOGRAM_CLOSE_FACTORS
303 .iter()
304 .copied()
305 .min_by(|left, right| {
306 let left_delta = (scaled - *left).abs();
307 let right_delta = (scaled - *right).abs();
308 left_delta
309 .partial_cmp(&right_delta)
310 .unwrap_or(std::cmp::Ordering::Equal)
311 })
312 .unwrap_or(1.0);
313 factor * magnitude
314}
315
316fn round_down_to_step(value: f64, step: f64) -> f64 {
317 (value / step).floor() * step
318}
319
320fn round_up_to_step(value: f64, step: f64) -> f64 {
321 (value / step).ceil() * step
322}
323
324fn float_round_log10(value: f64, magnitude: f64) -> f64 {
325 if value == 0.0 {
326 return 0.0_f64.copysign(value);
327 }
328
329 let digits = ceil_neg_log10(magnitude) + 1;
330 if value > 0.0 {
331 round_to_digits(value, digits)
332 } else {
333 -round_to_digits(-value, digits)
334 }
335}
336
337fn round_to_digits(value: f64, digits: i32) -> f64 {
338 if digits >= 0 {
339 let scale = 10f64.powi(digits);
340 (value * scale).round() / scale
341 } else {
342 let scale = 10f64.powi(-digits);
343 (value / scale).round() * scale
344 }
345}
346
347fn format_float_like_ruby(value: f64) -> String {
348 if value == 0.0 {
349 if value.is_sign_negative() {
350 return String::from("-0.0");
351 }
352 return String::from("0.0");
353 }
354
355 let mut text = value.to_string();
356 if !text.contains('.') && !text.contains('e') && !text.contains('E') {
357 text.push_str(".0");
358 }
359 text
360}
361
362fn split_widths(text: &str) -> (usize, usize) {
363 let Some((left, right)) = text.split_once('.') else {
364 return (text.chars().count(), 0);
365 };
366 (left.chars().count(), right.chars().count())
367}
368
369#[cfg(test)]
370mod tests {
371 use std::fs;
372
373 use super::{ClosedInterval, HistogramError, HistogramOptions, histogram};
374 use crate::color::{NamedColor, TermColor};
375 use crate::parse_border_type;
376 use crate::test_util::{assert_fixture_eq, render_plot_text};
377
378 fn fixture_data() -> Vec<f64> {
379 let path = format!(
380 "{}/tests/fixtures/data/randn.txt",
381 env!("CARGO_MANIFEST_DIR")
382 );
383 let text = fs::read_to_string(path).expect("randn fixture should load");
384 text.lines()
385 .map(|line| line.parse::<f64>().expect("line must parse as f64"))
386 .collect()
387 }
388
389 #[test]
390 fn errors_for_unknown_border_name() {
391 let err =
392 parse_border_type("invalid_border_name").expect_err("unknown border name should fail");
393 assert_eq!(
394 err,
395 crate::BarplotError::UnknownBorderType {
396 name: String::from("invalid_border_name")
397 }
398 );
399 }
400
401 #[test]
402 fn default_and_nocolor_fixtures() {
403 let data = fixture_data();
404 let plot = histogram(&data, HistogramOptions::default()).expect("histogram should succeed");
405 assert_fixture_eq(
406 &render_plot_text(&plot, true),
407 "tests/fixtures/histogram/default.txt",
408 );
409 assert_fixture_eq(
410 &render_plot_text(&plot, false),
411 "tests/fixtures/histogram/default_nocolor.txt",
412 );
413 }
414
415 #[test]
416 fn scaled_data_fixtures() {
417 let data = fixture_data();
418 let scaled_up: Vec<f64> = data.iter().map(|value| value * 100.0).collect();
419 let scaled_down: Vec<f64> = data.iter().map(|value| value * 0.01).collect();
420
421 let up_plot =
422 histogram(&scaled_up, HistogramOptions::default()).expect("histogram should succeed");
423 assert_fixture_eq(
424 &render_plot_text(&up_plot, true),
425 "tests/fixtures/histogram/default_1e2.txt",
426 );
427
428 let down_plot =
429 histogram(&scaled_down, HistogramOptions::default()).expect("histogram should succeed");
430 assert_fixture_eq(
431 &render_plot_text(&down_plot, true),
432 "tests/fixtures/histogram/default_1e-2.txt",
433 );
434 }
435
436 #[test]
437 fn logscale_and_custom_label_fixtures() {
438 let data = fixture_data();
439
440 let log_plot = histogram(
441 &data,
442 HistogramOptions {
443 xscale: crate::canvas::Scale::Log10,
444 ..HistogramOptions::default()
445 },
446 )
447 .expect("histogram should succeed");
448 assert_fixture_eq(
449 &render_plot_text(&log_plot, true),
450 "tests/fixtures/histogram/log10.txt",
451 );
452
453 let custom_plot = histogram(
454 &data,
455 HistogramOptions {
456 xscale: crate::canvas::Scale::Log10,
457 xlabel: Some(String::from("custom label")),
458 ..HistogramOptions::default()
459 },
460 )
461 .expect("histogram should succeed");
462 assert_fixture_eq(
463 &render_plot_text(&custom_plot, true),
464 "tests/fixtures/histogram/log10_label.txt",
465 );
466 }
467
468 #[test]
469 fn explicit_bins_and_right_closed_fixture() {
470 let data = fixture_data();
471 let plot = histogram(
472 &data,
473 HistogramOptions {
474 nbins: Some(5),
475 closed: ClosedInterval::Right,
476 ..HistogramOptions::default()
477 },
478 )
479 .expect("histogram should succeed");
480 assert_fixture_eq(
481 &render_plot_text(&plot, true),
482 "tests/fixtures/histogram/hist_params.txt",
483 );
484 }
485
486 #[test]
487 fn parameterized_fixtures() {
488 let data = fixture_data();
489
490 let parameters1 = histogram(
491 &data,
492 HistogramOptions {
493 title: Some(String::from("My Histogram")),
494 xlabel: Some(String::from("Absolute Frequency")),
495 color: TermColor::Named(NamedColor::Blue),
496 margin: 7,
497 padding: 3,
498 ..HistogramOptions::default()
499 },
500 )
501 .expect("histogram should succeed");
502 assert_fixture_eq(
503 &render_plot_text(¶meters1, true),
504 "tests/fixtures/histogram/parameters1.txt",
505 );
506
507 let parameters1_nolabels = histogram(
508 &data,
509 HistogramOptions {
510 title: Some(String::from("My Histogram")),
511 xlabel: Some(String::from("Absolute Frequency")),
512 color: TermColor::Named(NamedColor::Blue),
513 margin: 7,
514 padding: 3,
515 labels: false,
516 ..HistogramOptions::default()
517 },
518 )
519 .expect("histogram should succeed");
520 assert_fixture_eq(
521 &render_plot_text(¶meters1_nolabels, true),
522 "tests/fixtures/histogram/parameters1_nolabels.txt",
523 );
524
525 let parameters2 = histogram(
526 &data,
527 HistogramOptions {
528 title: Some(String::from("My Histogram")),
529 xlabel: Some(String::from("Absolute Frequency")),
530 color: TermColor::Named(NamedColor::Yellow),
531 border: crate::border::BorderType::Solid,
532 symbol: Some('='),
533 width: 50,
534 ..HistogramOptions::default()
535 },
536 )
537 .expect("histogram should succeed");
538 assert_fixture_eq(
539 &render_plot_text(¶meters2, true),
540 "tests/fixtures/histogram/parameters2.txt",
541 );
542 }
543
544 #[test]
545 fn issue_24_regression() {
546 let result = histogram(&[1, 2], HistogramOptions::default());
547 assert!(result.is_ok(), "histogram([1, 2]) should not error");
548 }
549
550 #[test]
551 fn validates_inputs() {
552 let empty = histogram::<f64>(&[], HistogramOptions::default());
553 assert!(matches!(empty, Err(HistogramError::EmptyData)));
554
555 let invalid_bins = histogram(
556 &[1.0, 2.0],
557 HistogramOptions {
558 nbins: Some(0),
559 ..HistogramOptions::default()
560 },
561 );
562 assert!(matches!(invalid_bins, Err(HistogramError::InvalidBinCount)));
563
564 let parse_error = histogram(&["abc"], HistogramOptions::default());
565 assert!(matches!(
566 parse_error,
567 Err(HistogramError::InvalidNumericValue { .. })
568 ));
569
570 let non_finite = histogram(&["NaN"], HistogramOptions::default());
571 assert!(matches!(
572 non_finite,
573 Err(HistogramError::InvalidNumericValue { .. })
574 ));
575 }
576}