use thiserror::Error;
use crate::barplot::{BarplotError, BarplotGraphics, BarplotOptions, barplot};
use crate::border::BorderType;
use crate::canvas::Scale;
use crate::color::{NamedColor, TermColor};
use crate::math::{ceil_neg_log10, minmax, usize_to_f64};
use crate::plot::Plot;
const DEFAULT_HISTOGRAM_SYMBOL: char = '\u{2587}';
const HISTOGRAM_CLOSE_FACTORS: [f64; 5] = [1.0, 2.0, 2.5, 5.0, 10.0];
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum ClosedInterval {
Left,
Right,
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct HistogramOptions {
pub title: Option<String>,
pub xlabel: Option<String>,
pub ylabel: Option<String>,
pub border: BorderType,
pub margin: u16,
pub padding: u16,
pub labels: bool,
pub color: TermColor,
pub width: usize,
pub symbol: Option<char>,
pub xscale: Scale,
pub nbins: Option<usize>,
pub closed: ClosedInterval,
}
impl Default for HistogramOptions {
fn default() -> Self {
Self {
title: None,
xlabel: None,
ylabel: None,
border: BorderType::Barplot,
margin: Plot::<BarplotGraphics>::DEFAULT_MARGIN,
padding: Plot::<BarplotGraphics>::DEFAULT_PADDING,
labels: true,
color: TermColor::Named(NamedColor::Green),
width: 40,
symbol: Some(DEFAULT_HISTOGRAM_SYMBOL),
xscale: Scale::Identity,
nbins: None,
closed: ClosedInterval::Left,
}
}
}
#[derive(Debug, Error, PartialEq)]
#[non_exhaustive]
pub enum HistogramError {
#[error("histogram data cannot be empty")]
EmptyData,
#[error("nbins must be greater than 0")]
InvalidBinCount,
#[error("invalid numeric value: {value}")]
InvalidNumericValue { value: String },
#[error(transparent)]
Barplot(#[from] BarplotError),
}
pub fn histogram<V: ToString>(
data: &[V],
options: HistogramOptions,
) -> Result<Plot<BarplotGraphics>, HistogramError> {
if data.is_empty() {
return Err(HistogramError::EmptyData);
}
if matches!(options.nbins, Some(0)) {
return Err(HistogramError::InvalidBinCount);
}
let values = parse_data(data)?;
let (labels, counts) = build_histogram(&values, options.nbins, options.closed);
let xlabel = options
.xlabel
.unwrap_or_else(|| transformed_frequency_label(options.xscale));
let barplot_options = BarplotOptions {
title: options.title,
xlabel: Some(xlabel),
ylabel: options.ylabel,
border: options.border,
margin: options.margin,
padding: options.padding,
labels: options.labels,
color: options.color,
width: options.width,
symbol: options.symbol,
xscale: options.xscale,
};
barplot(&labels, &counts, barplot_options).map_err(HistogramError::from)
}
fn parse_data<V: ToString>(data: &[V]) -> Result<Vec<f64>, HistogramError> {
data.iter()
.map(|value| {
let display = value.to_string();
let numeric =
display
.parse::<f64>()
.map_err(|_| HistogramError::InvalidNumericValue {
value: display.clone(),
})?;
if !numeric.is_finite() {
return Err(HistogramError::InvalidNumericValue { value: display });
}
Ok(numeric)
})
.collect()
}
fn build_histogram(
data: &[f64],
nbins: Option<usize>,
closed: ClosedInterval,
) -> (Vec<String>, Vec<usize>) {
let bins = nbins.unwrap_or_else(|| sturges_bins(data.len()));
let (min_value, max_value) = minmax(data);
let raw_width = if bins > 1 {
(max_value - min_value) / usize_to_f64(bins - 1)
} else {
max_value - min_value
};
let mut bin_width = rounded_bin_width(raw_width);
if !bin_width.is_finite() || bin_width <= 0.0 {
bin_width = 1.0;
}
let lower = round_down_to_step(min_value, bin_width);
let mut upper = round_up_to_step(max_value, bin_width);
if upper <= lower {
upper = lower + bin_width;
}
let mut edges = vec![lower];
loop {
let next = edges.last().copied().unwrap_or(lower) + bin_width;
if next >= upper {
break;
}
edges.push(next);
}
edges.push(upper);
let mut counts = vec![0usize; edges.len().saturating_sub(1)];
for value in data {
let mut index = match closed {
ClosedInterval::Left => edges
.partition_point(|edge| *edge <= *value)
.saturating_sub(1),
ClosedInterval::Right => edges
.partition_point(|edge| *edge < *value)
.saturating_sub(1),
};
if let Some(last_index) = counts.len().checked_sub(1) {
index = index.min(last_index);
counts[index] += 1;
}
}
let labels = histogram_labels(&edges, bin_width, closed);
(labels, counts)
}
fn histogram_labels(edges: &[f64], bin_width: f64, closed: ClosedInterval) -> Vec<String> {
let mut rounded_edges = Vec::with_capacity(edges.len());
let mut pad_left = 0usize;
let mut pad_right = 0usize;
for edge in edges {
let value = float_round_log10(*edge, bin_width);
let text = format_float_like_ruby(value);
let (left_width, right_width) = split_widths(&text);
pad_left = pad_left.max(left_width);
pad_right = pad_right.max(right_width);
rounded_edges.push(value);
}
let (left_bracket, right_bracket) = match closed {
ClosedInterval::Left => ('[', ')'),
ClosedInterval::Right => ('(', ']'),
};
let mut labels = Vec::with_capacity(edges.len().saturating_sub(1));
for rounded_pair in rounded_edges.windows(2) {
let val1 = rounded_pair[0];
let mut val2 = rounded_pair[1];
if val2 == 0.0 && val1 < 0.0 && bin_width < 0.01 {
val2 = -0.0;
}
let text1 = format_float_like_ruby(val1);
let text2 = format_float_like_ruby(val2);
let (left1, right1) = split_widths(&text1);
let (left2, right2) = split_widths(&text2);
let mut label = String::new();
label.push(left_bracket);
label.push_str(&" ".repeat(pad_left.saturating_sub(left1)));
label.push_str(&text1);
label.push_str(&" ".repeat(pad_right.saturating_sub(right1)));
label.push_str(", ");
label.push_str(&" ".repeat(pad_left.saturating_sub(left2)));
label.push_str(&text2);
label.push_str(&" ".repeat(pad_right.saturating_sub(right2)));
label.push(right_bracket);
labels.push(label);
}
labels
}
fn transformed_frequency_label(scale: Scale) -> String {
match scale {
Scale::Identity => String::from("Frequency"),
Scale::Ln => String::from("Frequency [ln]"),
Scale::Log2 => String::from("Frequency [log2]"),
Scale::Log10 => String::from("Frequency [log10]"),
}
}
fn sturges_bins(sample_size: usize) -> usize {
if sample_size <= 1 {
return 1;
}
let mut bins = 1usize;
let mut boundary = 1usize;
while boundary < sample_size {
boundary = boundary.saturating_mul(2);
bins = bins.saturating_add(1);
}
bins
}
fn rounded_bin_width(raw_width: f64) -> f64 {
if !raw_width.is_finite() || raw_width <= 0.0 {
return 1.0;
}
let magnitude = 10f64.powf(raw_width.log10().floor());
let scaled = raw_width / magnitude;
let factor = HISTOGRAM_CLOSE_FACTORS
.iter()
.copied()
.min_by(|left, right| {
let left_delta = (scaled - *left).abs();
let right_delta = (scaled - *right).abs();
left_delta
.partial_cmp(&right_delta)
.unwrap_or(std::cmp::Ordering::Equal)
})
.unwrap_or(1.0);
factor * magnitude
}
fn round_down_to_step(value: f64, step: f64) -> f64 {
(value / step).floor() * step
}
fn round_up_to_step(value: f64, step: f64) -> f64 {
(value / step).ceil() * step
}
fn float_round_log10(value: f64, magnitude: f64) -> f64 {
if value == 0.0 {
return 0.0_f64.copysign(value);
}
let digits = ceil_neg_log10(magnitude) + 1;
if value > 0.0 {
round_to_digits(value, digits)
} else {
-round_to_digits(-value, digits)
}
}
fn round_to_digits(value: f64, digits: i32) -> f64 {
if digits >= 0 {
let scale = 10f64.powi(digits);
(value * scale).round() / scale
} else {
let scale = 10f64.powi(-digits);
(value / scale).round() * scale
}
}
fn format_float_like_ruby(value: f64) -> String {
if value == 0.0 {
if value.is_sign_negative() {
return String::from("-0.0");
}
return String::from("0.0");
}
let mut text = value.to_string();
if !text.contains('.') && !text.contains('e') && !text.contains('E') {
text.push_str(".0");
}
text
}
fn split_widths(text: &str) -> (usize, usize) {
let Some((left, right)) = text.split_once('.') else {
return (text.chars().count(), 0);
};
(left.chars().count(), right.chars().count())
}
#[cfg(test)]
mod tests {
use std::fs;
use super::{ClosedInterval, HistogramError, HistogramOptions, histogram};
use crate::color::{NamedColor, TermColor};
use crate::parse_border_type;
use crate::test_util::{assert_fixture_eq, render_plot_text};
fn fixture_data() -> Vec<f64> {
let path = format!(
"{}/tests/fixtures/data/randn.txt",
env!("CARGO_MANIFEST_DIR")
);
let text = fs::read_to_string(path).expect("randn fixture should load");
text.lines()
.map(|line| line.parse::<f64>().expect("line must parse as f64"))
.collect()
}
#[test]
fn errors_for_unknown_border_name() {
let err =
parse_border_type("invalid_border_name").expect_err("unknown border name should fail");
assert_eq!(
err,
crate::BarplotError::UnknownBorderType {
name: String::from("invalid_border_name")
}
);
}
#[test]
fn default_and_nocolor_fixtures() {
let data = fixture_data();
let plot = histogram(&data, HistogramOptions::default()).expect("histogram should succeed");
assert_fixture_eq(
&render_plot_text(&plot, true),
"tests/fixtures/histogram/default.txt",
);
assert_fixture_eq(
&render_plot_text(&plot, false),
"tests/fixtures/histogram/default_nocolor.txt",
);
}
#[test]
fn scaled_data_fixtures() {
let data = fixture_data();
let scaled_up: Vec<f64> = data.iter().map(|value| value * 100.0).collect();
let scaled_down: Vec<f64> = data.iter().map(|value| value * 0.01).collect();
let up_plot =
histogram(&scaled_up, HistogramOptions::default()).expect("histogram should succeed");
assert_fixture_eq(
&render_plot_text(&up_plot, true),
"tests/fixtures/histogram/default_1e2.txt",
);
let down_plot =
histogram(&scaled_down, HistogramOptions::default()).expect("histogram should succeed");
assert_fixture_eq(
&render_plot_text(&down_plot, true),
"tests/fixtures/histogram/default_1e-2.txt",
);
}
#[test]
fn logscale_and_custom_label_fixtures() {
let data = fixture_data();
let log_plot = histogram(
&data,
HistogramOptions {
xscale: crate::canvas::Scale::Log10,
..HistogramOptions::default()
},
)
.expect("histogram should succeed");
assert_fixture_eq(
&render_plot_text(&log_plot, true),
"tests/fixtures/histogram/log10.txt",
);
let custom_plot = histogram(
&data,
HistogramOptions {
xscale: crate::canvas::Scale::Log10,
xlabel: Some(String::from("custom label")),
..HistogramOptions::default()
},
)
.expect("histogram should succeed");
assert_fixture_eq(
&render_plot_text(&custom_plot, true),
"tests/fixtures/histogram/log10_label.txt",
);
}
#[test]
fn explicit_bins_and_right_closed_fixture() {
let data = fixture_data();
let plot = histogram(
&data,
HistogramOptions {
nbins: Some(5),
closed: ClosedInterval::Right,
..HistogramOptions::default()
},
)
.expect("histogram should succeed");
assert_fixture_eq(
&render_plot_text(&plot, true),
"tests/fixtures/histogram/hist_params.txt",
);
}
#[test]
fn parameterized_fixtures() {
let data = fixture_data();
let parameters1 = histogram(
&data,
HistogramOptions {
title: Some(String::from("My Histogram")),
xlabel: Some(String::from("Absolute Frequency")),
color: TermColor::Named(NamedColor::Blue),
margin: 7,
padding: 3,
..HistogramOptions::default()
},
)
.expect("histogram should succeed");
assert_fixture_eq(
&render_plot_text(¶meters1, true),
"tests/fixtures/histogram/parameters1.txt",
);
let parameters1_nolabels = histogram(
&data,
HistogramOptions {
title: Some(String::from("My Histogram")),
xlabel: Some(String::from("Absolute Frequency")),
color: TermColor::Named(NamedColor::Blue),
margin: 7,
padding: 3,
labels: false,
..HistogramOptions::default()
},
)
.expect("histogram should succeed");
assert_fixture_eq(
&render_plot_text(¶meters1_nolabels, true),
"tests/fixtures/histogram/parameters1_nolabels.txt",
);
let parameters2 = histogram(
&data,
HistogramOptions {
title: Some(String::from("My Histogram")),
xlabel: Some(String::from("Absolute Frequency")),
color: TermColor::Named(NamedColor::Yellow),
border: crate::border::BorderType::Solid,
symbol: Some('='),
width: 50,
..HistogramOptions::default()
},
)
.expect("histogram should succeed");
assert_fixture_eq(
&render_plot_text(¶meters2, true),
"tests/fixtures/histogram/parameters2.txt",
);
}
#[test]
fn issue_24_regression() {
let result = histogram(&[1, 2], HistogramOptions::default());
assert!(result.is_ok(), "histogram([1, 2]) should not error");
}
#[test]
fn validates_inputs() {
let empty = histogram::<f64>(&[], HistogramOptions::default());
assert!(matches!(empty, Err(HistogramError::EmptyData)));
let invalid_bins = histogram(
&[1.0, 2.0],
HistogramOptions {
nbins: Some(0),
..HistogramOptions::default()
},
);
assert!(matches!(invalid_bins, Err(HistogramError::InvalidBinCount)));
let parse_error = histogram(&["abc"], HistogramOptions::default());
assert!(matches!(
parse_error,
Err(HistogramError::InvalidNumericValue { .. })
));
let non_finite = histogram(&["NaN"], HistogramOptions::default());
assert!(matches!(
non_finite,
Err(HistogramError::InvalidNumericValue { .. })
));
}
}