1use core::cmp::Ordering;
17
18#[derive(Debug, Clone, PartialEq)]
19pub struct HistogramBucket {
20 pub start: f64,
21 pub end: f64,
22 pub count: usize,
23}
24
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub enum DistributionError {
27 EmptyInput,
28 InvalidBucketCount,
29}
30
31pub fn min(values: &[f64]) -> Result<f64, DistributionError> {
32 values
33 .iter()
34 .copied()
35 .min_by(f64::total_cmp)
36 .ok_or(DistributionError::EmptyInput)
37}
38
39pub fn max(values: &[f64]) -> Result<f64, DistributionError> {
40 values
41 .iter()
42 .copied()
43 .max_by(f64::total_cmp)
44 .ok_or(DistributionError::EmptyInput)
45}
46
47pub fn range(values: &[f64]) -> Result<f64, DistributionError> {
48 Ok(max(values)? - min(values)?)
49}
50
51pub fn frequency_counts(values: &[f64]) -> Result<Vec<(f64, usize)>, DistributionError> {
52 if values.is_empty() {
53 return Err(DistributionError::EmptyInput);
54 }
55
56 let mut sorted = values.to_vec();
57 sorted.sort_by(f64::total_cmp);
58
59 let mut counts: Vec<(f64, usize)> = Vec::new();
60 for value in sorted {
61 match counts.last_mut() {
62 Some((existing, count)) if existing.total_cmp(&value) == Ordering::Equal => *count += 1,
63 _ => counts.push((value, 1)),
64 }
65 }
66
67 Ok(counts)
68}
69
70pub fn histogram(
71 values: &[f64],
72 bucket_count: usize,
73) -> Result<Vec<HistogramBucket>, DistributionError> {
74 if values.is_empty() {
75 return Err(DistributionError::EmptyInput);
76 }
77
78 if bucket_count == 0 {
79 return Err(DistributionError::InvalidBucketCount);
80 }
81
82 let min_value = min(values)?;
83 let max_value = max(values)?;
84
85 if min_value == max_value {
86 return Ok(vec![HistogramBucket {
87 start: min_value,
88 end: max_value,
89 count: values.len(),
90 }]);
91 }
92
93 let width = (max_value - min_value) / bucket_count as f64;
94 let mut buckets = (0..bucket_count)
95 .map(|index| HistogramBucket {
96 start: min_value + index as f64 * width,
97 end: if index + 1 == bucket_count {
98 max_value
99 } else {
100 min_value + (index + 1) as f64 * width
101 },
102 count: 0,
103 })
104 .collect::<Vec<_>>();
105
106 for value in values {
107 let raw_index = ((value - min_value) / width).floor() as usize;
108 let index = raw_index.min(bucket_count - 1);
109 buckets[index].count += 1;
110 }
111
112 Ok(buckets)
113}
114
115#[cfg(test)]
116mod tests {
117 use super::{frequency_counts, histogram, max, min, range, DistributionError, HistogramBucket};
118
119 #[test]
120 fn computes_min_max_and_range() {
121 assert_eq!(min(&[4.0, 1.0, 7.0, 3.0]).unwrap(), 1.0);
122 assert_eq!(max(&[4.0, 1.0, 7.0, 3.0]).unwrap(), 7.0);
123 assert_eq!(range(&[4.0, 1.0, 7.0, 3.0]).unwrap(), 6.0);
124 }
125
126 #[test]
127 fn computes_frequency_counts() {
128 assert_eq!(
129 frequency_counts(&[3.0, 1.0, 3.0, 2.0, 1.0, 3.0]).unwrap(),
130 vec![(1.0, 2), (2.0, 1), (3.0, 3)]
131 );
132 }
133
134 #[test]
135 fn computes_histogram_buckets() {
136 assert_eq!(
137 histogram(&[1.0, 2.0, 2.0, 3.0, 4.0], 3).unwrap(),
138 vec![
139 HistogramBucket {
140 start: 1.0,
141 end: 2.0,
142 count: 1,
143 },
144 HistogramBucket {
145 start: 2.0,
146 end: 3.0,
147 count: 2,
148 },
149 HistogramBucket {
150 start: 3.0,
151 end: 4.0,
152 count: 2,
153 },
154 ]
155 );
156 }
157
158 #[test]
159 fn handles_single_value_inputs() {
160 assert_eq!(range(&[5.0]).unwrap(), 0.0);
161 assert_eq!(
162 histogram(&[5.0], 4).unwrap(),
163 vec![HistogramBucket {
164 start: 5.0,
165 end: 5.0,
166 count: 1,
167 }]
168 );
169 }
170
171 #[test]
172 fn rejects_invalid_inputs() {
173 assert_eq!(min(&[]), Err(DistributionError::EmptyInput));
174 assert_eq!(max(&[]), Err(DistributionError::EmptyInput));
175 assert_eq!(range(&[]), Err(DistributionError::EmptyInput));
176 assert_eq!(frequency_counts(&[]), Err(DistributionError::EmptyInput));
177 assert_eq!(histogram(&[], 3), Err(DistributionError::EmptyInput));
178 assert_eq!(
179 histogram(&[1.0, 2.0], 0),
180 Err(DistributionError::InvalidBucketCount)
181 );
182 }
183}