Skip to main content

use_distribution/
lib.rs

1//! Basic distribution summaries for `f64` slices.
2//!
3//! The helpers cover minimum and maximum lookup, range calculation, frequency
4//! counts, and a small equal-width histogram representation.
5//!
6//! # Examples
7//!
8//! ```rust
9//! use use_distribution::{frequency_counts, histogram, range};
10//!
11//! assert_eq!(range(&[1.0, 3.0, 5.0]).unwrap(), 4.0);
12//! assert_eq!(frequency_counts(&[1.0, 1.0, 2.0]).unwrap(), vec![(1.0, 2), (2.0, 1)]);
13//! assert_eq!(histogram(&[1.0, 2.0, 2.0, 3.0, 4.0], 3).unwrap().len(), 3);
14//! ```
15
16use core::cmp::Ordering;
17
18#[derive(Debug, Clone, PartialEq)]
19pub struct HistogramBucket {
20    pub start: f64,
21    pub end: f64,
22    pub count: usize,
23}
24
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub enum DistributionError {
27    EmptyInput,
28    InvalidBucketCount,
29}
30
31pub fn min(values: &[f64]) -> Result<f64, DistributionError> {
32    values
33        .iter()
34        .copied()
35        .min_by(f64::total_cmp)
36        .ok_or(DistributionError::EmptyInput)
37}
38
39pub fn max(values: &[f64]) -> Result<f64, DistributionError> {
40    values
41        .iter()
42        .copied()
43        .max_by(f64::total_cmp)
44        .ok_or(DistributionError::EmptyInput)
45}
46
47pub fn range(values: &[f64]) -> Result<f64, DistributionError> {
48    Ok(max(values)? - min(values)?)
49}
50
51pub fn frequency_counts(values: &[f64]) -> Result<Vec<(f64, usize)>, DistributionError> {
52    if values.is_empty() {
53        return Err(DistributionError::EmptyInput);
54    }
55
56    let mut sorted = values.to_vec();
57    sorted.sort_by(f64::total_cmp);
58
59    let mut counts: Vec<(f64, usize)> = Vec::new();
60    for value in sorted {
61        match counts.last_mut() {
62            Some((existing, count)) if existing.total_cmp(&value) == Ordering::Equal => *count += 1,
63            _ => counts.push((value, 1)),
64        }
65    }
66
67    Ok(counts)
68}
69
70pub fn histogram(
71    values: &[f64],
72    bucket_count: usize,
73) -> Result<Vec<HistogramBucket>, DistributionError> {
74    if values.is_empty() {
75        return Err(DistributionError::EmptyInput);
76    }
77
78    if bucket_count == 0 {
79        return Err(DistributionError::InvalidBucketCount);
80    }
81
82    let min_value = min(values)?;
83    let max_value = max(values)?;
84
85    if min_value == max_value {
86        return Ok(vec![HistogramBucket {
87            start: min_value,
88            end: max_value,
89            count: values.len(),
90        }]);
91    }
92
93    let width = (max_value - min_value) / bucket_count as f64;
94    let mut buckets = (0..bucket_count)
95        .map(|index| HistogramBucket {
96            start: min_value + index as f64 * width,
97            end: if index + 1 == bucket_count {
98                max_value
99            } else {
100                min_value + (index + 1) as f64 * width
101            },
102            count: 0,
103        })
104        .collect::<Vec<_>>();
105
106    for value in values {
107        let raw_index = ((value - min_value) / width).floor() as usize;
108        let index = raw_index.min(bucket_count - 1);
109        buckets[index].count += 1;
110    }
111
112    Ok(buckets)
113}
114
115#[cfg(test)]
116mod tests {
117    use super::{frequency_counts, histogram, max, min, range, DistributionError, HistogramBucket};
118
119    #[test]
120    fn computes_min_max_and_range() {
121        assert_eq!(min(&[4.0, 1.0, 7.0, 3.0]).unwrap(), 1.0);
122        assert_eq!(max(&[4.0, 1.0, 7.0, 3.0]).unwrap(), 7.0);
123        assert_eq!(range(&[4.0, 1.0, 7.0, 3.0]).unwrap(), 6.0);
124    }
125
126    #[test]
127    fn computes_frequency_counts() {
128        assert_eq!(
129            frequency_counts(&[3.0, 1.0, 3.0, 2.0, 1.0, 3.0]).unwrap(),
130            vec![(1.0, 2), (2.0, 1), (3.0, 3)]
131        );
132    }
133
134    #[test]
135    fn computes_histogram_buckets() {
136        assert_eq!(
137            histogram(&[1.0, 2.0, 2.0, 3.0, 4.0], 3).unwrap(),
138            vec![
139                HistogramBucket {
140                    start: 1.0,
141                    end: 2.0,
142                    count: 1,
143                },
144                HistogramBucket {
145                    start: 2.0,
146                    end: 3.0,
147                    count: 2,
148                },
149                HistogramBucket {
150                    start: 3.0,
151                    end: 4.0,
152                    count: 2,
153                },
154            ]
155        );
156    }
157
158    #[test]
159    fn handles_single_value_inputs() {
160        assert_eq!(range(&[5.0]).unwrap(), 0.0);
161        assert_eq!(
162            histogram(&[5.0], 4).unwrap(),
163            vec![HistogramBucket {
164                start: 5.0,
165                end: 5.0,
166                count: 1,
167            }]
168        );
169    }
170
171    #[test]
172    fn rejects_invalid_inputs() {
173        assert_eq!(min(&[]), Err(DistributionError::EmptyInput));
174        assert_eq!(max(&[]), Err(DistributionError::EmptyInput));
175        assert_eq!(range(&[]), Err(DistributionError::EmptyInput));
176        assert_eq!(frequency_counts(&[]), Err(DistributionError::EmptyInput));
177        assert_eq!(histogram(&[], 3), Err(DistributionError::EmptyInput));
178        assert_eq!(
179            histogram(&[1.0, 2.0], 0),
180            Err(DistributionError::InvalidBucketCount)
181        );
182    }
183}