Skip to main content

lmn_core/stats/
distribution.rs

1// ── Distribution ──────────────────────────────────────────────────────────────
2
3/// Owns a sorted snapshot of observed values and answers arbitrary quantile queries.
4///
5/// Construction sorts the input once; queries are O(1) index lookups.
6/// Input is `f64` throughout — Duration callers convert to milliseconds at the call site.
7pub struct Distribution {
8    sorted: Vec<f64>,
9}
10
11impl Distribution {
12    /// Sorts `values` and takes ownership. O(n log n).
13    ///
14    /// Use `from_sorted` when values are already sorted to skip the sort step.
15    pub fn from_unsorted(mut values: Vec<f64>) -> Self {
16        values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
17        Self { sorted: values }
18    }
19
20    /// Constructs a `Distribution` from an already-sorted `Vec<f64>`.
21    ///
22    /// Caller is responsible for ensuring the input is sorted in ascending order.
23    /// No sort is performed; construction is O(1).
24    pub fn from_sorted(values: Vec<f64>) -> Self {
25        Self { sorted: values }
26    }
27
28    /// Returns the value at quantile `p` in `[0.0, 1.0]` using the floor-index formula.
29    ///
30    /// `idx = (n as f64 * p).floor() as usize`, clamped to `[0, n-1]`.
31    /// Returns `0.0` for an empty distribution.
32    ///
33    /// This formula is equivalent to the integer formula `n * p / 100` (for integer
34    /// percentile values 0–100) used in the ASCII table renderer, ensuring both
35    /// output paths report identical percentile values.
36    pub fn quantile(&self, p: f64) -> f64 {
37        if self.sorted.is_empty() {
38            return 0.0;
39        }
40        let n = self.sorted.len();
41        let idx = ((n as f64 * p).floor() as usize).min(n - 1);
42        self.sorted[idx]
43    }
44
45    /// Returns the minimum value, or `0.0` for an empty distribution.
46    pub fn min(&self) -> f64 {
47        self.sorted.first().copied().unwrap_or(0.0)
48    }
49
50    /// Returns the maximum value, or `0.0` for an empty distribution.
51    pub fn max(&self) -> f64 {
52        self.sorted.last().copied().unwrap_or(0.0)
53    }
54
55    /// Returns the arithmetic mean, or `0.0` for an empty distribution.
56    pub fn mean(&self) -> f64 {
57        if self.sorted.is_empty() {
58            return 0.0;
59        }
60        self.sorted.iter().sum::<f64>() / self.sorted.len() as f64
61    }
62
63    /// Returns `true` if the distribution contains no values.
64    pub fn is_empty(&self) -> bool {
65        self.sorted.is_empty()
66    }
67
68    /// Returns the number of values in the distribution.
69    pub fn len(&self) -> usize {
70        self.sorted.len()
71    }
72
73    /// Returns the value at a pre-computed index into the sorted backing array.
74    ///
75    /// Use this when the caller has already computed the index via the integer formula
76    /// `(n * p / 100).min(n - 1)` and wants to avoid the redundant float round-trip
77    /// through `quantile(idx as f64 / n as f64)`.
78    ///
79    /// # Panics
80    /// Panics if `idx >= self.len()`. The caller is responsible for clamping the index
81    /// to `[0, n-1]` before calling this method.
82    pub fn value_at(&self, idx: usize) -> f64 {
83        self.sorted[idx]
84    }
85}
86
87// ── Tests ─────────────────────────────────────────────────────────────────────
88
89#[cfg(test)]
90mod tests {
91    use super::*;
92
93    // ── Distribution ──────────────────────────────────────────────────────────
94
95    #[test]
96    fn distribution_quantile_known_values() {
97        // 100-element uniform distribution: 1.0, 2.0, ..., 100.0
98        let values: Vec<f64> = (1..=100).map(|i| i as f64).collect();
99        let dist = Distribution::from_sorted(values);
100
101        // quantile(0.0): idx = floor(100 * 0.0) = 0 → value 1.0
102        assert_eq!(dist.quantile(0.0), 1.0);
103        // quantile(0.5): idx = floor(100 * 0.5) = 50 → value 51.0
104        assert_eq!(dist.quantile(0.5), 51.0);
105        // quantile(0.99): idx = floor(100 * 0.99) = 99 → value 100.0
106        assert_eq!(dist.quantile(0.99), 100.0);
107        // quantile(1.0): idx = floor(100 * 1.0) = 100, clamped to 99 → value 100.0
108        assert_eq!(dist.quantile(1.0), 100.0);
109    }
110
111    #[test]
112    fn distribution_empty_returns_zero() {
113        let dist = Distribution::from_sorted(vec![]);
114        assert_eq!(dist.quantile(0.5), 0.0);
115        assert_eq!(dist.min(), 0.0);
116        assert_eq!(dist.max(), 0.0);
117        assert_eq!(dist.mean(), 0.0);
118    }
119
120    #[test]
121    fn distribution_single_element() {
122        let dist = Distribution::from_sorted(vec![42.0]);
123        assert_eq!(dist.quantile(0.0), 42.0);
124        assert_eq!(dist.quantile(0.5), 42.0);
125        assert_eq!(dist.quantile(1.0), 42.0);
126        assert_eq!(dist.min(), 42.0);
127        assert_eq!(dist.max(), 42.0);
128        assert_eq!(dist.mean(), 42.0);
129    }
130
131    #[test]
132    fn distribution_quantile_boundary_p100() {
133        let values: Vec<f64> = (1..=10).map(|i| i as f64).collect();
134        let dist = Distribution::from_sorted(values);
135        // Must not panic — must return last element
136        assert_eq!(dist.quantile(1.0), 10.0);
137    }
138
139    #[test]
140    fn distribution_avg_is_arithmetic_mean() {
141        let dist = Distribution::from_sorted(vec![1.0, 2.0, 3.0]);
142        assert_eq!(dist.mean(), 2.0);
143    }
144
145    #[test]
146    fn distribution_is_sorted_on_construction() {
147        // Unsorted input: [5.0, 1.0, 3.0, 2.0, 4.0]
148        let dist = Distribution::from_unsorted(vec![5.0, 1.0, 3.0, 2.0, 4.0]);
149        assert_eq!(dist.min(), 1.0);
150        assert_eq!(dist.max(), 5.0);
151        // quantile(0.5): idx = floor(5 * 0.5) = 2 → value 3.0
152        assert_eq!(dist.quantile(0.5), 3.0);
153    }
154
155    #[test]
156    fn distribution_len_and_is_empty() {
157        let empty = Distribution::from_sorted(vec![]);
158        assert!(empty.is_empty());
159        assert_eq!(empty.len(), 0);
160
161        let dist = Distribution::from_sorted(vec![1.0, 2.0, 3.0]);
162        assert!(!dist.is_empty());
163        assert_eq!(dist.len(), 3);
164    }
165
166    #[test]
167    fn distribution_value_at_returns_correct_element() {
168        // sorted: [10.0, 20.0, 30.0, 40.0, 50.0]
169        let dist = Distribution::from_sorted(vec![10.0, 20.0, 30.0, 40.0, 50.0]);
170        assert_eq!(dist.value_at(0), 10.0);
171        assert_eq!(dist.value_at(2), 30.0);
172        assert_eq!(dist.value_at(4), 50.0);
173    }
174}